From e4f180ce5873db809575672fe785ed740517ae14 Mon Sep 17 00:00:00 2001 From: Stuart Slattery Date: Fri, 21 Dec 2018 12:46:40 -0500 Subject: [PATCH 1/3] Adding base communication plan Co-authored-by: dalg24 --- core/src/CMakeLists.txt | 50 +- core/src/Cabana_CommunicationPlan.hpp | 610 ++++++++++++++++++ core/unit_test/CMakeLists.txt | 20 +- .../Cuda/tstCommunicationPlan_Cuda.cpp | 13 + .../OpenMP/tstCommunicationPlan_OpenMP.cpp | 13 + .../Pthread/tstCommunicationPlan_Pthread.cpp | 13 + .../Serial/tstCommunicationPlan_Serial.cpp | 13 + core/unit_test/tstCommunicationPlan.hpp | 535 +++++++++++++++ 8 files changed, 1259 insertions(+), 8 deletions(-) create mode 100644 core/src/Cabana_CommunicationPlan.hpp create mode 100644 core/unit_test/Cuda/tstCommunicationPlan_Cuda.cpp create mode 100644 core/unit_test/OpenMP/tstCommunicationPlan_OpenMP.cpp create mode 100644 core/unit_test/Pthread/tstCommunicationPlan_Pthread.cpp create mode 100644 core/unit_test/Serial/tstCommunicationPlan_Serial.cpp create mode 100644 core/unit_test/tstCommunicationPlan.hpp diff --git a/core/src/CMakeLists.txt b/core/src/CMakeLists.txt index 79127b084..dd5678dee 100644 --- a/core/src/CMakeLists.txt +++ b/core/src/CMakeLists.txt @@ -2,21 +2,57 @@ configure_file(CabanaCore_config.hpp.cmakein CabanaCore_config.hpp) #----------------------------------------------------------------------------- -file(GLOB HEADERS_PUBLIC *.hpp) -file(GLOB SOURCES *.cpp) +set(HEADERS_PUBLIC + Cabana_AoSoA.hpp + Cabana_Core.hpp + Cabana_DeepCopy.hpp + Cabana_ExecutionPolicy.hpp + Cabana_LinkedCellList.hpp + Cabana_Macros.hpp + Cabana_MemberTypes.hpp + Cabana_NeighborList.hpp + Cabana_Parallel.hpp + Cabana_Slice.hpp + Cabana_SoA.hpp + Cabana_Sort.hpp + Cabana_Tuple.hpp + Cabana_Types.hpp + Cabana_VerletList.hpp + Cabana_Version.hpp + ) #----------------------------------------------------------------------------- +# MPI-dependent code +if(Cabana_ENABLE_MPI) + list(APPEND HEADERS_PUBLIC + Cabana_CommunicationPlan.hpp + Cabana_Distributor.hpp + Cabana_Halo.hpp + ) +endif() -file(GLOB HEADERS_IMPL impl/*.hpp) -file(GLOB SOURCES_IMPL impl/*.cpp) +#----------------------------------------------------------------------------- +# implementation details +set(HEADERS_IMPL + impl/Cabana_CartesianGrid.hpp + impl/Cabana_Index.hpp + impl/Cabana_IndexSequence.hpp + impl/Cabana_PerformanceTraits.hpp + impl/Cabana_TypeTraits.hpp + ) +set(SOURCES_IMPL + impl/Cabana_Version.cpp + ) + +#----------------------------------------------------------------------------- -install(FILES ${HEADERS_PUBLIC} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) +install(FILES ${HEADERS_PUBLIC} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) install(FILES ${HEADERS_IMPL} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/impl/) -install(FILES ${CMAKE_CURRENT_BINARY_DIR}/CabanaCore_config.hpp DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/CabanaCore_config.hpp DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) #----------------------------------------------------------------------------- -add_library(cabanacore ${SOURCES} ${SOURCES} ${SOURCES_IMPL}) +add_library(cabanacore ${SOURCES_IMPL}) target_include_directories(cabanacore PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) target_link_libraries(cabanacore Kokkos::kokkos) if(Cabana_ENABLE_MPI) diff --git a/core/src/Cabana_CommunicationPlan.hpp b/core/src/Cabana_CommunicationPlan.hpp new file mode 100644 index 000000000..07c1c0abc --- /dev/null +++ b/core/src/Cabana_CommunicationPlan.hpp @@ -0,0 +1,610 @@ +/**************************************************************************** + * Copyright (c) 2018 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#ifndef CABANA_COMMUNICATIONPLAN_HPP +#define CABANA_COMMUNICATIONPLAN_HPP + +#include + +#include + +#include +#include +#include +#include +#include +#include + +namespace Cabana +{ +//---------------------------------------------------------------------------// +/*! + \class CommunicationPlan + + \brief Communication plan base class. + + \tparam MemorySpace Memory space in which the data for this class will be + allocated. + + The communication plan computes how to redistribute elements in a parallel + data structure using MPI. Given a list of data elements on the local MPI + rank and their destination ranks, the communication plan computes which rank + each process is sending and receiving from and how many elements we will + send and receive. In addition, it provides an export steering vector which + describes how to pack the local data to be exported into contiguous send + buffers for each destination rank (in the forward communication plan). + + Some nomenclature: + + Export - elements we are sending in the forward communication plan. + + Import - elements we are receiving in the forward communication plan. + + \note If a communication plan does self-sends (i.e. exports and imports data + from its own ranks) then this data is first in the data structure. What this + means is that neighbor 0 is the local rank and the data for that rank that + is being exported will appear first in the steering vector. +*/ +template +class CommunicationPlan +{ + public: + + // Cabana memory space. + using memory_space = MemorySpace; + + // Kokkos execution space. + using execution_space = typename memory_space::execution_space; + + /*! + \brief Constructor. + + \param comm The MPI communicator over which the distributor is defined. + + \return The MPI communicator for this plan. + */ + CommunicationPlan( MPI_Comm comm ) + : _comm( comm ) + {} + + /*! + \brief Get the MPI communicator. + */ + MPI_Comm comm() const + { return _comm; } + + /*! + \brief Get the number of neighbor ranks that this rank will communicate + with. + + \return The number of MPI ranks that will exchange data with this rank. + */ + int numNeighbor() const + { return _neighbors.size(); } + + /*! + \brief Given a local neighbor id get its rank in the MPI communicator. + + \param neighbor The local id of the neighbor to get the rank for. + + \return The MPI rank of the neighbor with the given local id. + */ + int neighborRank( const int neighbor ) const + { return _neighbors[neighbor]; } + + /*! + \brief Get the number of elements this rank will export to a given neighbor. + + \param neighbor The local id of the neighbor to get the number of + exports for. + + \return The number of elements this rank will export to the neighbor with the + given local id. + */ + std::size_t numExport( const int neighbor ) const + { return _num_export[neighbor]; } + + /*! + \brief Get the total number of exports this rank will do. + + \return The total number of elements this rank will export to its + neighbors. + */ + std::size_t totalNumExport() const + { return _total_num_export; } + + /*! + \brief Get the number of elements this rank will import from a given neighbor. + + \param neighbor The local id of the neighbor to get the number of + imports for. + + \return The number of elements this rank will import from the neighbor + with the given local id. + */ + std::size_t numImport( const int neighbor ) const + { return _num_import[neighbor]; } + + /*! + \brief Get the total number of imports this rank will do. + + \return The total number of elements this rank will import from its + neighhbors. + */ + std::size_t totalNumImport() const + { return _total_num_import; } + + /*! + \brief Get the number of export elements. + + Whenever the communciation plan is applied, this is the total number of + elements expected to be input on the sending ranks (in the forward + communication plan). This will be different than the number returned by + totalNumExport() if some of the export ranks used in the construction + are -1 and therefore will not particpate in an export operation. + + \return The number of export elements. + */ + std::size_t exportSize() const + { return _num_export_element; } + + /*! + \brief Get the steering vector for the exports. + + \return The steering vector for the exports. + + The steering vector places exports in contiguous chunks by destination + rank. The chunks are in consecutive order based on the local neighbor id + (i.e. all elements going to neighbor with local id 0 first, then all + elements going to neighbor with local id 1, etc.). + */ + Kokkos::View getExportSteering() const + { return _export_steering; } + + // The functions in the public block below would normally be protected but + // we make them public to allow using private class data in CUDA kernels + // with lambda functions. + public: + + /*! + \brief Neighbor and export rank creator. Use this when you already know + which ranks neighbor each other (i.e. every rank already knows who they + will be sending and receiving from) as it will be more efficient. In + this case you already know the topology of the point-to-point + communication but not how much data to send to and receive from the + neighbors. + + \param element_export_ranks The destination rank in the target + decomposition of each locally owned element in the source + decomposition. Each element will have one unique destination to which it + will be exported. This export rank may be any one of the listed neighbor + ranks which can include the calling rank. An export rank of -1 will + signal that this element is *not* to be exported and will be ignored in + the data migration. The input is expected to be a Kokkos view or Cabana + slice in the same memory space as the communication plan. + + \param mpi_tag The MPI tag to use for non-blocking communication in the + communication plan generation. + + \param neighbor_ranks List of ranks this rank will send to and receive + from. This list can include the calling rank. This is effectively a + description of the topology of the point-to-point communication plan. + + \note Calling this function completely updates the state of this object + and invalidates the previous state. + + \note For elements that you do not wish to export, use an export rank of + -1 to signal that this element is *not* to be exported and will be + ignored in the data migration. In other words, this element will be + *completely* removed in the new decomposition. If the data is staying on + this rank, just use this rank as the export destination and the data + will be efficiently migrated. + */ + template + void createFromExportsAndTopology( + const ViewType& element_export_ranks, + const std::vector& neighbor_ranks, + const int mpi_tag = 1221 ) + { + // Store the number of export elements. + _num_export_element = element_export_ranks.size(); + + // Store the neighbors. + _neighbors = neighbor_ranks; + int num_n = _neighbors.size(); + + // Get the MPI rank we are currently on. + int my_rank = -1; + MPI_Comm_rank( _comm, &my_rank ); + + // If we are sending to ourself put that one first in the neighbor + // list. + for ( auto& n : _neighbors ) + if ( n == my_rank ) + { + std::swap( n, _neighbors[0] ); + break; + } + + // Initialize import/export sizes. + _num_export.assign( num_n, 0 ); + _num_import.assign( num_n, 0 ); + + // Copy the topology to the device. + Kokkos::View + topology_host( _neighbors.data(), num_n ); + auto topology = Kokkos::create_mirror_view_and_copy( + memory_space(), topology_host ); + + // Count the number of sends to each neighbor. + Kokkos::View + num_export_host( _num_export.data(), num_n ); + auto export_counts = Kokkos::create_mirror_view_and_copy( + memory_space(), num_export_host ); + auto count_neighbor_func = + KOKKOS_LAMBDA( const int i ) + { + for ( int n = 0; n < num_n; ++n ) + if ( topology(n) == element_export_ranks(i) ) + Kokkos::atomic_increment( &export_counts(n) ); + }; + Kokkos::RangePolicy count_neighbor_policy( + 0, _num_export_element ); + Kokkos::parallel_for( "Cabana::CommunicationPlan::count_neighbors", + count_neighbor_policy, + count_neighbor_func ); + Kokkos::fence(); + + // Copy counts back to the host. + Kokkos::deep_copy( num_export_host, export_counts ); + + // Post receives for the number of imports we will get. + std::vector requests; + requests.reserve( num_n ); + for ( int n = 0; n < num_n; ++n ) + if ( my_rank != _neighbors[n] ) + { + requests.push_back( MPI_Request() ); + MPI_Irecv( &_num_import[n], + 1, + MPI_UNSIGNED_LONG, + _neighbors[n], + mpi_tag, + _comm, + &(requests.back()) ); + } + else + _num_import[n] = _num_export[n]; + + // Send the number of exports to each of our neighbors. + for ( int n = 0; n < num_n; ++n ) + if ( my_rank != _neighbors[n] ) + MPI_Send( &_num_export[n], + 1, + MPI_UNSIGNED_LONG, + _neighbors[n], + mpi_tag, + _comm ); + + // Wait on receives. + std::vector status( requests.size() ); + const int ec = + MPI_Waitall( requests.size(), requests.data(), status.data() ); + assert( MPI_SUCCESS == ec ); + + // Get the total number of imports/exports. + _total_num_export = + std::accumulate( _num_export.begin(), _num_export.end(), 0 ); + _total_num_import = + std::accumulate( _num_import.begin(), _num_import.end(), 0 ); + + // Barrier before continuing to ensure synchronization. + MPI_Barrier( _comm ); + } + + /*! + \brief Export rank creator. Use this when you don't know who you will + receiving from - only who you are sending to. This is less efficient + than if we already knew who our neighbors were because we have to + determine the topology of the point-to-point communication first. + + \param element_export_ranks The destination rank in the target + decomposition of each locally owned element in the source + decomposition. Each element will have one unique destination to which it + will be exported. This export rank may any one of the listed neighbor + ranks which can include the calling rank. An export rank of -1 will + signal that this element is *not* to be exported and will be ignored in + the data migration. The input is expected to be a Kokkos view or Cabana + slice in the same memory space as the communication plan. + + \param mpi_tag The MPI tag to use for non-blocking communication in the + communication plan generation. + + \note Calling this function completely updates the state of this object + and invalidates the previous state. + + \note For elements that you do not wish to export, use an export rank of + -1 to signal that this element is *not* to be exported and will be + ignored in the data migration. In other words, this element will be + *completely* removed in the new decomposition. If the data is staying on + this rank, just use this rank as the export destination and the data + will be efficiently migrated. + */ + template + void createFromExportsOnly( const ViewType& element_export_ranks, + const int mpi_tag = 1221 ) + { + // Store the number of export elements. + _num_export_element = element_export_ranks.size(); + + // Get the size of this communicator. + int comm_size = -1; + MPI_Comm_size( _comm, &comm_size ); + + // Get the MPI rank we are currently on. + int my_rank = -1; + MPI_Comm_rank( _comm, &my_rank ); + + // Count the number of sends this rank will do to other ranks. + Kokkos::View neighbor_counts( + "neighbor_counts", comm_size ); + auto count_sends_func = + KOKKOS_LAMBDA( const int i ) + { + if ( element_export_ranks(i) >= 0 ) + Kokkos::atomic_increment( + &neighbor_counts(element_export_ranks(i)) ); + }; + Kokkos::RangePolicy count_sends_policy( + 0, _num_export_element ); + Kokkos::parallel_for( "Cabana::CommunicationPlan::count_sends", + count_sends_policy, + count_sends_func ); + Kokkos::fence(); + + // Copy the counts to the host. + auto neighbor_counts_host = Kokkos::create_mirror_view_and_copy( + Kokkos::HostSpace(), neighbor_counts ); + + // Extract the export ranks and number of exports and then flag the + // send ranks. + _neighbors.clear(); + _num_export.clear(); + _total_num_export = 0; + for ( int r = 0; r < comm_size; ++r ) + if ( neighbor_counts_host(r) > 0 ) + { + _neighbors.push_back( r ); + _num_export.push_back( neighbor_counts_host(r) ); + _total_num_export += neighbor_counts_host(r); + neighbor_counts_host(r) = 1; + } + + // Get the number of export ranks and initially allocate the import sizes. + int num_export_rank = _neighbors.size(); + _num_import.assign( num_export_rank, 0 ); + + // If we are sending to ourself put that one first in the neighbor + // list and assign the number of imports to be the number of exports. + bool self_send = false; + for ( int n = 0; n < num_export_rank; ++n ) + if ( _neighbors[n] == my_rank ) + { + std::swap( _neighbors[n], _neighbors[0] ); + std::swap( _num_export[n], _num_export[0] ); + _num_import[0] = _num_export[0]; + self_send = true; + break; + } + + // Determine how many total import ranks each neighbor has. + std::vector total_receives( comm_size ); + int root_rank = 0; + MPI_Reduce( neighbor_counts_host.data(), + total_receives.data(), + comm_size, + MPI_INT, + MPI_SUM, + root_rank, + _comm ); + int num_import_rank = -1; + MPI_Scatter( total_receives.data(), + 1, + MPI_INT, + &num_import_rank, + 1, + MPI_INT, + root_rank, + _comm ); + if ( self_send ) --num_import_rank; + + // Post the expected number of receives and indicate we might get them + // from any rank. + std::vector import_sizes( num_import_rank ); + std::vector requests( num_import_rank ); + for ( int n = 0; n < num_import_rank; ++n ) + MPI_Irecv( &import_sizes[n], + 1, + MPI_UNSIGNED_LONG, + MPI_ANY_SOURCE, + mpi_tag, + _comm, + &requests[n] ); + + // Do blocking sends. Dont do any self sends. + int self_offset = (self_send) ? 1 : 0; + for ( int n = self_offset; n < num_export_rank; ++n ) + MPI_Send( &_num_export[n], + 1, + MPI_UNSIGNED_LONG, + _neighbors[n], + mpi_tag, + _comm ); + + // Wait on non-blocking receives. + std::vector status( requests.size() ); + const int ec = + MPI_Waitall( requests.size(), requests.data(), status.data() ); + assert( MPI_SUCCESS == ec ); + + // Compute the total number of imports. + _total_num_import = std::accumulate( + import_sizes.begin(), import_sizes.end(), + (self_send) ? _num_import[0] : 0 ); + + // Extract the imports. If we did self sends we already know what + // imports we got from that. + for ( int i = 0; i < num_import_rank; ++i ) + { + // Get the message source. + const auto source = status[i].MPI_SOURCE; + + // See if the neighbor we received stuff from was someone we also + // sent stuff to. If it was, just record what they sent us. + auto found_neighbor = + std::find( _neighbors.begin(), _neighbors.end(), source ); + + // If this is a new neighbor (i.e. someone we didn't send anything + // to) record this. Otherwise add it to the one we found. + if ( found_neighbor == std::end(_neighbors) ) + { + _neighbors.push_back( source ); + _num_import.push_back( import_sizes[i] ); + _num_export.push_back( 0 ); + } + else + { + _num_import[i+self_offset] = import_sizes[i]; + } + } + + // Barrier before continuing to ensure synchronization. + MPI_Barrier( _comm ); + } + + /*! + \brief Create the export steering vector. + + Creates an array describing which export element ids are moved to which + location in the send buffer of the communcation plan. Ordered such that + if a rank sends to itself then those values come first. + + \param element_export_ranks The ranks to which we are exporting each + element. We use this to build the steering vector. The input is expected + to be a Kokkos view or Cabana slice in the same memory space as the + communication plan. + */ + template + void createExportSteering( const ViewType& element_export_ranks ) + { + // passing in element_export_ranks here as a dummy argument. + createSteering( true, element_export_ranks, element_export_ranks ); + } + + /*! + \brief Create the export steering vector. + + Creates an array describing which export element ids are moved to which + location in the contiguous send buffer of the communcation plan. Ordered + such that if a rank sends to itself then those values come first. + + \param element_export_ranks The ranks to which we are exporting each + element. We use this to build the steering vector. The input is expected + to be a Kokkos view or Cabana slice in the same memory space as the + communication plan. + + \param element_export_ids The local ids of the elements to be + exported. This corresponds with the export ranks vector and must be the + same length if defined. The input is expected to be a Kokkos view or + Cabana slice in the same memory space as the communication plan. + */ + template + void createExportSteering( + const RankViewType& element_export_ranks, + const IdViewType& element_export_ids ) + { + createSteering( false, element_export_ranks, element_export_ids ); + } + + // Create the export steering vector. + template + void createSteering( + const bool use_iota, + const RankViewType& element_export_ranks, + const IdViewType& element_export_ids ) + { + if ( !use_iota && + (element_export_ids.size() != element_export_ranks.size()) ) + throw std::runtime_error("Export ids and ranks different sizes!"); + + // Calculate the steering offsets via exclusive prefix sum for the + // exports. + int num_n = _neighbors.size(); + Kokkos::View + offsets_host( "offsets", num_n ); + for ( int n = 1; n < num_n; ++n ) + offsets_host(n) = offsets_host(n-1) + _num_export[n-1]; + + // Copy the offsets to the device. + auto offsets = Kokkos::create_mirror_view_and_copy( + memory_space(), offsets_host ); + + // Copy the neighbors to the device. + Kokkos::View + neighbor_ranks_host( _neighbors.data(), num_n ); + auto neighbor_ranks = Kokkos::create_mirror_view_and_copy( + memory_space(), neighbor_ranks_host ); + + // Create the export steering vector for writing local elements into + // the send buffer. Note we create a local, shallow copy - this is a + // CUDA workaround. + _export_steering = Kokkos::View( + Kokkos::ViewAllocateWithoutInitializing("export_steering"), + _total_num_export ); + auto steer_vec = _export_steering; + Kokkos::View counts( "counts", num_n ); + auto steer_func = + KOKKOS_LAMBDA( const int i ) + { + for ( int n = 0; n < num_n; ++n ) + if ( element_export_ranks(i) == neighbor_ranks(n) ) + { + auto c = Kokkos::atomic_fetch_add( &counts(n), 1 ); + steer_vec( offsets(n) + c ) = + (use_iota) ? i : element_export_ids(i); + break; + } + }; + Kokkos::RangePolicy steer_policy( + 0, element_export_ranks.size() ); + Kokkos::parallel_for( "Cabana::createSteering", + steer_policy, + steer_func ); + Kokkos::fence(); + } + + private: + + MPI_Comm _comm; + std::vector _neighbors; + std::size_t _total_num_export; + std::size_t _total_num_import; + std::vector _num_export; + std::vector _num_import; + std::size_t _num_export_element; + Kokkos::View _export_steering; +}; + +//---------------------------------------------------------------------------// + +} // end namespace Cabana + +#endif // end CABANA_COMMUNICATIONPLAN_HPP diff --git a/core/unit_test/CMakeLists.txt b/core/unit_test/CMakeLists.txt index 7237a8193..9ff442914 100644 --- a/core/unit_test/CMakeLists.txt +++ b/core/unit_test/CMakeLists.txt @@ -22,7 +22,7 @@ foreach(_test Version Index CartesianGrid SoA) endforeach() ##--------------------------------------------------------------------------## -## Tests. +## On-node tests. ##--------------------------------------------------------------------------## foreach(_device ${CABANA_SUPPORTED_DEVICES}) if(Cabana_ENABLE_${_device}) @@ -40,3 +40,21 @@ foreach(_device ${CABANA_SUPPORTED_DEVICES}) endforeach() endif() endforeach() + +##--------------------------------------------------------------------------## +## MPI tests. +##--------------------------------------------------------------------------## +if(${Cabana_ENABLE_MPI}) + foreach(_device ${CABANA_SUPPORTED_DEVICES}) + if(Cabana_ENABLE_${_device}) + foreach(_test CommunicationPlan Distributor Halo) + add_executable(${_test}_test_${_device} ${_device}/tst${_test}_${_device}.cpp mpi_unit_test_main.cpp) + target_link_libraries(${_test}_test_${_device} cabanacore cabana_core_gtest) + foreach(_np 1 ${MPIEXEC_MAX_NUMPROCS}) + add_test(NAME ${_test}_test_${_device}_${_np} COMMAND + ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} ${_np} ${_test}_test_${_device} --gtest_color=yes) + endforeach() + endforeach() + endif() + endforeach() +endif() diff --git a/core/unit_test/Cuda/tstCommunicationPlan_Cuda.cpp b/core/unit_test/Cuda/tstCommunicationPlan_Cuda.cpp new file mode 100644 index 000000000..5961ea2a6 --- /dev/null +++ b/core/unit_test/Cuda/tstCommunicationPlan_Cuda.cpp @@ -0,0 +1,13 @@ +/**************************************************************************** + * Copyright (c) 2018 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#include +#include diff --git a/core/unit_test/OpenMP/tstCommunicationPlan_OpenMP.cpp b/core/unit_test/OpenMP/tstCommunicationPlan_OpenMP.cpp new file mode 100644 index 000000000..dc5e28f99 --- /dev/null +++ b/core/unit_test/OpenMP/tstCommunicationPlan_OpenMP.cpp @@ -0,0 +1,13 @@ +/**************************************************************************** + * Copyright (c) 2018 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#include +#include diff --git a/core/unit_test/Pthread/tstCommunicationPlan_Pthread.cpp b/core/unit_test/Pthread/tstCommunicationPlan_Pthread.cpp new file mode 100644 index 000000000..178f0c9de --- /dev/null +++ b/core/unit_test/Pthread/tstCommunicationPlan_Pthread.cpp @@ -0,0 +1,13 @@ +/**************************************************************************** + * Copyright (c) 2018 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#include +#include diff --git a/core/unit_test/Serial/tstCommunicationPlan_Serial.cpp b/core/unit_test/Serial/tstCommunicationPlan_Serial.cpp new file mode 100644 index 000000000..47728992d --- /dev/null +++ b/core/unit_test/Serial/tstCommunicationPlan_Serial.cpp @@ -0,0 +1,13 @@ +/**************************************************************************** + * Copyright (c) 2018 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#include +#include diff --git a/core/unit_test/tstCommunicationPlan.hpp b/core/unit_test/tstCommunicationPlan.hpp new file mode 100644 index 000000000..ec284aade --- /dev/null +++ b/core/unit_test/tstCommunicationPlan.hpp @@ -0,0 +1,535 @@ +/**************************************************************************** + * Copyright (c) 2018 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#include + +#include + +#include + +#include + +#include +#include + +namespace Test +{ +//---------------------------------------------------------------------------// +class CommPlanTester : public Cabana::CommunicationPlan +{ + public: + + CommPlanTester( MPI_Comm comm ) + : Cabana::CommunicationPlan(comm) + {} + + template + void createFromExportsAndNeighbors( + const ViewType& element_export_ranks, + const std::vector& neighbor_ranks ) + { + this->createFromExportsAndTopology( + element_export_ranks, neighbor_ranks ); + } + + template + void createFromExports( const ViewType& element_export_ranks ) + { + this->createFromExportsOnly( element_export_ranks ); + } + + template + void createSteering( const ViewType& element_export_ranks ) + { + this->createExportSteering( element_export_ranks ); + } + + template + void createSteering( + const RankViewType& element_export_ranks, + const IdViewType& element_export_ids ) + { + this->createExportSteering( + element_export_ranks, element_export_ids ); + } +}; + +//---------------------------------------------------------------------------// +void test1( const bool use_topology ) +{ + // Make a communication plan. + CommPlanTester comm_plan( MPI_COMM_WORLD ); + + // Get my rank. + int my_rank = -1; + MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); + + // Every rank will communicate with itself and send all of its data. + int num_data = 10; + Kokkos::View + export_ranks( "export_ranks", num_data ); + Kokkos::deep_copy( export_ranks, my_rank ); + std::vector neighbor_ranks( 1, my_rank ); + + // Create the plan. + if ( use_topology ) + comm_plan.createFromExportsAndNeighbors( export_ranks, neighbor_ranks ); + else + comm_plan.createFromExports( export_ranks ); + + // Check the plan. + EXPECT_EQ( comm_plan.numNeighbor(), 1 ); + EXPECT_EQ( comm_plan.neighborRank(0), my_rank ); + EXPECT_EQ( comm_plan.numExport(0), num_data ); + EXPECT_EQ( comm_plan.totalNumExport(), num_data ); + EXPECT_EQ( comm_plan.numImport(0), num_data ); + EXPECT_EQ( comm_plan.totalNumImport(), num_data ); + + // Create the export steering vector. + comm_plan.createSteering( export_ranks ); + + // Check the steering vector. We thread the creation of the steering + // vector so we don't really know what order it is in - only that it is + // grouped by the ranks to which we are exporting. In this case just sort + // the steering vector and make sure all of the ids are there. We can do + // this because we are only sending to one rank. + auto steering = comm_plan.getExportSteering(); + auto host_steering = Kokkos::create_mirror_view_and_copy( + Kokkos::HostSpace(), steering ); + std::sort( host_steering.data(), + host_steering.data() + host_steering.size() ); + EXPECT_EQ( host_steering.size(), num_data ); + for ( int n = 0; n < num_data; ++n ) + EXPECT_EQ( n, host_steering(n) ); +} + +//---------------------------------------------------------------------------// +void test2( const bool use_topology ) +{ + // Make a communication plan. + CommPlanTester comm_plan( MPI_COMM_WORLD ); + + // Get my rank. + int my_rank = -1; + MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); + + // Every rank will communicate with itself and send every other piece of + // data. + int num_data = 10; + Kokkos::View export_ranks_host( + "export_ranks", num_data ); + for ( int n = 0; n < num_data; ++n ) + export_ranks_host(n) = ( 0 == n%2 ) ? my_rank : -1; + auto export_ranks = Kokkos::create_mirror_view_and_copy( + TEST_MEMSPACE(), export_ranks_host ); + std::vector neighbor_ranks( 1, my_rank ); + + // Create the plan + if ( use_topology ) + comm_plan.createFromExportsAndNeighbors( export_ranks, neighbor_ranks ); + else + comm_plan.createFromExports( export_ranks ); + + // Check the plan. + EXPECT_EQ( comm_plan.numNeighbor(), 1 ); + EXPECT_EQ( comm_plan.neighborRank(0), my_rank ); + EXPECT_EQ( comm_plan.numExport(0), num_data / 2 ); + EXPECT_EQ( comm_plan.totalNumExport(), num_data / 2 ); + EXPECT_EQ( comm_plan.numImport(0), num_data / 2); + EXPECT_EQ( comm_plan.totalNumImport(), num_data / 2 ); + + // Create the export steering vector. + Kokkos::View + export_ids_host( "export_ids", export_ranks.size() ); + std::iota( export_ids_host.data(), + export_ids_host.data() + export_ranks.size(), + 0 ); + auto element_export_ids = Kokkos::create_mirror_view_and_copy( + TEST_MEMSPACE(), export_ids_host ); + comm_plan.createSteering( export_ranks, element_export_ids ); + + // Check the steering vector. We thread the creation of the steering + // vector so we don't really know what order it is in - only that it is + // grouped by the ranks to which we are exporting. In this case just sort + // the steering vector and make sure all of the ids are there. We can do + // this because we are only sending to one rank. + auto steering = comm_plan.getExportSteering(); + auto host_steering = Kokkos::create_mirror_view_and_copy( + Kokkos::HostSpace(), steering ); + std::sort( host_steering.data(), + host_steering.data() + host_steering.size() ); + EXPECT_EQ( host_steering.size(), num_data / 2 ); + for ( int n = 0; n < num_data / 2; ++n ) + EXPECT_EQ( n * 2, host_steering(n) ); +} + +//---------------------------------------------------------------------------// +void test3( const bool use_topology ) +{ + // Make a communication plan. + CommPlanTester comm_plan( MPI_COMM_WORLD ); + + // Get my rank. + int my_rank = -1; + MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); + + // Get my size. + int my_size = -1; + MPI_Comm_size( MPI_COMM_WORLD, &my_size ); + + // Compute the inverse rank. + int inverse_rank = my_size - my_rank - 1; + + // Every rank will communicate with the rank that is its inverse. + int num_data = 10; + Kokkos::View + export_ranks( "export_ranks", num_data ); + Kokkos::deep_copy( export_ranks, inverse_rank ); + std::vector neighbor_ranks( 1, inverse_rank ); + + // Create the plan with both export ranks and the topology. + if ( use_topology ) + comm_plan.createFromExportsAndNeighbors( export_ranks, neighbor_ranks ); + else + comm_plan.createFromExports( export_ranks ); + + // Check the plan. + EXPECT_EQ( comm_plan.numNeighbor(), 1 ); + EXPECT_EQ( comm_plan.neighborRank(0), inverse_rank ); + EXPECT_EQ( comm_plan.numExport(0), num_data ); + EXPECT_EQ( comm_plan.totalNumExport(), num_data ); + EXPECT_EQ( comm_plan.numImport(0), num_data ); + EXPECT_EQ( comm_plan.totalNumImport(), num_data ); + + // Create the export steering vector. + comm_plan.createSteering( export_ranks ); + + // Check the steering vector. We thread the creation of the steering + // vector so we don't really know what order it is in - only that it is + // grouped by the ranks to which we are exporting. In this case just sort + // the steering vector and make sure all of the ids are there. We can do + // this because we are only sending to one rank. + auto steering = comm_plan.getExportSteering(); + auto host_steering = Kokkos::create_mirror_view_and_copy( + Kokkos::HostSpace(), steering ); + std::sort( host_steering.data(), + host_steering.data() + host_steering.size() ); + for ( int n = 0; n < num_data; ++n ) + EXPECT_EQ( n, host_steering(n) ); +} + +//---------------------------------------------------------------------------// +void test4( const bool use_topology ) +{ + // Make a communication plan. + CommPlanTester comm_plan( MPI_COMM_WORLD ); + + // Get my rank. + int my_rank = -1; + MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); + + // Get my size. + int my_size = -1; + MPI_Comm_size( MPI_COMM_WORLD, &my_size ); + + // Every rank will communicate with all other ranks. Interleave the sends. + int num_data = 2 * my_size; + Kokkos::View export_ranks_host( + "export_ranks", num_data ); + std::vector neighbor_ranks( my_size ); + for ( int n = 0; n < my_size; ++n ) + { + export_ranks_host[n] = n; + export_ranks_host[n + my_size] = n; + neighbor_ranks[n] = n; + } + auto export_ranks = Kokkos::create_mirror_view_and_copy( + TEST_MEMSPACE(), export_ranks_host ); + + // Create the plan + if ( use_topology ) + comm_plan.createFromExportsAndNeighbors( export_ranks, neighbor_ranks ); + else + comm_plan.createFromExports( export_ranks ); + + // Check the plan. Note that if we are sending to ourselves (which we are) + // that then that data is listed as the first neighbor. + EXPECT_EQ( comm_plan.numNeighbor(), my_size ); + EXPECT_EQ( comm_plan.totalNumExport(), num_data ); + EXPECT_EQ( comm_plan.totalNumImport(), num_data ); + + // self send + EXPECT_EQ( comm_plan.neighborRank(0), my_rank ); + EXPECT_EQ( comm_plan.numExport(0), 2 ); + EXPECT_EQ( comm_plan.numImport(0), 2 ); + + // others + for ( int n = 1; n < my_size; ++n ) + { + // the algorithm will swap this rank and the first one. + if ( n == my_rank ) + EXPECT_EQ( comm_plan.neighborRank(n), 0 ); + else + EXPECT_EQ( comm_plan.neighborRank(n), n ); + + EXPECT_EQ( comm_plan.numExport(n), 2 ); + EXPECT_EQ( comm_plan.numImport(n), 2 ); + } + + // Create the export steering vector. + comm_plan.createSteering( export_ranks ); + + // Check the steering vector. The algorithm will pack the ids according to + // send rank and self sends will appear first. + auto steering = comm_plan.getExportSteering(); + auto host_steering = Kokkos::create_mirror_view_and_copy( + Kokkos::HostSpace(), steering ); + EXPECT_EQ( host_steering.size(), num_data ); + + // self sends - we don't know which order the self sends are in but we + // know they are the first 2. + if ( my_rank == host_steering(0) ) + { + EXPECT_EQ( host_steering(0), my_rank ); + EXPECT_EQ( host_steering(1), my_rank + my_size ); + } + else + { + EXPECT_EQ( host_steering(1), my_rank ); + EXPECT_EQ( host_steering(0), my_rank + my_size ); + } + + // others. again, we don't know which order the vector was made in but we + // do know they are grouped by the rank to which we are sending and we + // know how those ranks are ordered. + for ( int n = 1; n < my_size; ++n ) + { + if ( n == my_rank ) + { + if ( 0 == host_steering(2*n) ) + { + EXPECT_EQ( host_steering(2*n), 0 ); + EXPECT_EQ( host_steering(2*n+1), my_size ); + } + else + { + EXPECT_EQ( host_steering(2*n+1), 0 ); + EXPECT_EQ( host_steering(2*n), my_size ); + } + } + else + { + if ( n == host_steering(2*n) ) + { + EXPECT_EQ( host_steering(2*n), n ); + EXPECT_EQ( host_steering(2*n+1), n + my_size ); + } + else + { + EXPECT_EQ( host_steering(2*n+1), n ); + EXPECT_EQ( host_steering(2*n), n + my_size ); + } + } + } +} + +//---------------------------------------------------------------------------// +void test5( const bool use_topology ) +{ + // Make a communication plan. + CommPlanTester comm_plan( MPI_COMM_WORLD ); + + // Get my rank. + int my_rank = -1; + MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); + + // Get my size. + int my_size = -1; + MPI_Comm_size( MPI_COMM_WORLD, &my_size ); + + // Every rank will communicate with all other ranks. Interleave the sends + // and only send every other value. + int num_data = 2 * my_size; + Kokkos::View export_ranks_host( + "export_ranks", num_data ); + std::vector neighbor_ranks( my_size ); + for ( int n = 0; n < my_size; ++n ) + { + export_ranks_host[n] = -1; + export_ranks_host[n + my_size] = n; + neighbor_ranks[n] = n; + } + auto export_ranks = Kokkos::create_mirror_view_and_copy( + TEST_MEMSPACE(), export_ranks_host ); + + // Create the plan + if ( use_topology ) + comm_plan.createFromExportsAndNeighbors( export_ranks, neighbor_ranks ); + else + comm_plan.createFromExports( export_ranks ); + + // Check the plan. Note that if we are sending to ourselves (which we are) + // that then that data is listed as the first neighbor. + EXPECT_EQ( comm_plan.numNeighbor(), my_size ); + EXPECT_EQ( comm_plan.totalNumExport(), num_data / 2); + EXPECT_EQ( comm_plan.totalNumImport(), num_data / 2); + + // self send + EXPECT_EQ( comm_plan.neighborRank(0), my_rank ); + EXPECT_EQ( comm_plan.numExport(0), 1 ); + EXPECT_EQ( comm_plan.numImport(0), 1 ); + + // others + for ( int n = 1; n < my_size; ++n ) + { + // the algorithm will swap this rank and the first one. + if ( n == my_rank ) + EXPECT_EQ( comm_plan.neighborRank(n), 0 ); + else + EXPECT_EQ( comm_plan.neighborRank(n), n ); + + EXPECT_EQ( comm_plan.numExport(n), 1 ); + EXPECT_EQ( comm_plan.numImport(n), 1 ); + } + + // Create the export steering vector. + comm_plan.createSteering( export_ranks ); + + // Check the steering vector. The algorithm will pack the ids according to + // send rank and self sends will appear first. + auto steering = comm_plan.getExportSteering(); + auto host_steering = Kokkos::create_mirror_view_and_copy( + Kokkos::HostSpace(), steering ); + EXPECT_EQ( host_steering.size(), my_size ); + + // self sends + EXPECT_EQ( host_steering(0), my_rank + my_size ); + + // others + for ( int n = 1; n < my_size; ++n ) + { + if ( n == my_rank ) + EXPECT_EQ( host_steering(n), my_size ); + else + EXPECT_EQ( host_steering(n), n + my_size ); + } +} + +//---------------------------------------------------------------------------// +void test6( const bool use_topology ) +{ + // Make a communication plan. + CommPlanTester comm_plan( MPI_COMM_WORLD ); + + // Get my rank. + int my_rank = -1; + MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); + + // Get the comm size. + int my_size = -1; + MPI_Comm_size( MPI_COMM_WORLD, &my_size ); + + // Every has one element and will send that element to rank 0. + int num_data = 1; + Kokkos::View + export_ranks( "export_ranks", num_data ); + Kokkos::deep_copy( export_ranks, 0 ); + std::vector neighbor_ranks; + if ( 0 == my_rank ) + { + neighbor_ranks.resize( my_size ); + std::iota( neighbor_ranks.begin(), neighbor_ranks.end(), 0 ); + } + else + { + neighbor_ranks.assign( 1, 0 ); + } + + // Create the plan. + if ( use_topology ) + comm_plan.createFromExportsAndNeighbors( export_ranks, neighbor_ranks ); + else + comm_plan.createFromExports( export_ranks ); + + // Check the plan. + if ( 0 == my_rank ) + { + EXPECT_EQ( comm_plan.numNeighbor(), my_size ); + EXPECT_EQ( comm_plan.numImport(0), 1 ); + EXPECT_EQ( comm_plan.totalNumImport(), my_size ); + } + else + { + EXPECT_EQ( comm_plan.numNeighbor(), 1 ); + EXPECT_EQ( comm_plan.neighborRank(0), 0 ); + EXPECT_EQ( comm_plan.numImport(0), 0 ); + EXPECT_EQ( comm_plan.totalNumImport(), 0 ); + } + EXPECT_EQ( comm_plan.numExport(0), num_data ); + EXPECT_EQ( comm_plan.totalNumExport(), num_data ); + + // Create the export steering vector. + comm_plan.createSteering( export_ranks ); + + // Check the steering vector. We thread the creation of the steering + // vector so we don't really know what order it is in - only that it is + // grouped by the ranks to which we are exporting. In this case just sort + // the steering vector and make sure all of the ids are there. We can do + // this because we are only sending to one rank. + auto steering = comm_plan.getExportSteering(); + auto host_steering = Kokkos::create_mirror_view_and_copy( + Kokkos::HostSpace(), steering ); + EXPECT_EQ( host_steering.size(), num_data ); + EXPECT_EQ( 0, host_steering(0) ); +} + +//---------------------------------------------------------------------------// +// RUN TESTS +//---------------------------------------------------------------------------// +TEST_F( TEST_CATEGORY, comm_plan_test_1 ) +{ test1(true); } + +TEST_F( TEST_CATEGORY, comm_plan_test_2 ) +{ test2(true); } + +TEST_F( TEST_CATEGORY, comm_plan_test_3 ) +{ test3(true); } + +TEST_F( TEST_CATEGORY, comm_plan_test_4 ) +{ test4(true); } + +TEST_F( TEST_CATEGORY, comm_plan_test_5 ) +{ test5(true); } + +TEST_F( TEST_CATEGORY, comm_plan_test_6 ) +{ test6(true); } + +TEST_F( TEST_CATEGORY, comm_plan_test_1_no_topo ) +{ test1(false); } + +TEST_F( TEST_CATEGORY, comm_plan_test_2_no_topo ) +{ test2(false); } + +TEST_F( TEST_CATEGORY, comm_plan_test_3_no_topo ) +{ test3(false); } + +TEST_F( TEST_CATEGORY, comm_plan_test_4_no_topo ) +{ test4(false); } + +TEST_F( TEST_CATEGORY, comm_plan_test_5_no_topo ) +{ test5(false); } + +TEST_F( TEST_CATEGORY, comm_plan_test_6_no_topo ) +{ test6(false); } + +//---------------------------------------------------------------------------// + +} // end namespace Test From 77cd5e8353f86d0eefbefda045be7a607a47946c Mon Sep 17 00:00:00 2001 From: Stuart Slattery Date: Fri, 21 Dec 2018 12:47:59 -0500 Subject: [PATCH 2/3] adding distributor communication plan for data migration Co-authored-by: dalg24 --- core/src/Cabana_Distributor.hpp | 610 ++++++++++++++++ core/unit_test/Cuda/tstDistributor_Cuda.cpp | 13 + .../OpenMP/tstDistributor_OpenMP.cpp | 13 + .../Pthread/tstDistributor_Pthread.cpp | 13 + .../Serial/tstDistributor_Serial.cpp | 13 + core/unit_test/tstDistributor.hpp | 676 ++++++++++++++++++ 6 files changed, 1338 insertions(+) create mode 100644 core/src/Cabana_Distributor.hpp create mode 100644 core/unit_test/Cuda/tstDistributor_Cuda.cpp create mode 100644 core/unit_test/OpenMP/tstDistributor_OpenMP.cpp create mode 100644 core/unit_test/Pthread/tstDistributor_Pthread.cpp create mode 100644 core/unit_test/Serial/tstDistributor_Serial.cpp create mode 100644 core/unit_test/tstDistributor.hpp diff --git a/core/src/Cabana_Distributor.hpp b/core/src/Cabana_Distributor.hpp new file mode 100644 index 000000000..25e82d9bb --- /dev/null +++ b/core/src/Cabana_Distributor.hpp @@ -0,0 +1,610 @@ +/**************************************************************************** + * Copyright (c) 2018 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#ifndef CABANA_DISTRIBUTOR_HPP +#define CABANA_DISTRIBUTOR_HPP + +#include +#include +#include + +#include + +#include + +#include +#include +#include + +namespace Cabana +{ +//---------------------------------------------------------------------------// +/*! + \class Distributor + + \brief Distributor is a communication plan for migrating data from one + uniquely-owned decomposition to another uniquely owned decomposition. + + \tparam MemorySpace Memory space in which the data for this class will be + allocated. + + The Distributor allows data to be migrated to an entirely new + decomposition. Only uniquely-owned decompositions are handled (i.e. each + local element in the source rank has a single unique destination rank). + + Some nomenclature: + + Export - the data we uniquely own that we will be sending to other ranks. + + Import - the data we uniquely own that we will be receiving from other + ranks. + + \note We can migrate data to the same rank. In this case a copy will occur + instead of communication. + + \note To get the number of elements this rank will be receiving from + migration in the forward communication plan, call totalNumImport() on the + distributor. This will be needed when in-place migration is not used and a + user must allocate their own destination data structure. + +*/ +template +class Distributor : public CommunicationPlan +{ + public: + + /*! + \brief Topology and export rank constructor. Use this when you already + know which ranks neighbor each other (i.e. every rank already knows who + they will be sending and receiving from) as it will be more + efficient. In this case you already know the topology of the + point-to-point communication but not how much data to send to and + receive from the neighbors. + + \tparam ViewType The container type for the export element ranks. This + container type can be either a Kokkos View or a Cabana Slice. + + \param comm The MPI communicator over which the distributor is defined. + + \param element_export_ranks The destination rank in the target + decomposition of each locally owned element in the source + decomposition. Each element will have one unique destination to which it + will be exported. This export rank may be any one of the listed neighbor + ranks which can include the calling rank. An export rank of -1 will + signal that this element is *not* to be exported and will be ignored in + the data migration. The input is expected to be a Kokkos view or Cabana + slice in the same memory space as the distributor. + + \param neighbor_ranks List of ranks this rank will send to and receive + from. This list can include the calling rank. This is effectively a + description of the topology of the point-to-point communication plan. + + \param mpi_tag The MPI tag to use for non-blocking communication in the + communication plan generation. + + \note For elements that you do not wish to export, use an export rank of + -1 to signal that this element is *not* to be exported and will be + ignored in the data migration. In other words, this element will be + *completely* removed in the new decomposition. If the data is staying on + this rank, just use this rank as the export destination and the data + will be efficiently migrated. + */ + template + Distributor( MPI_Comm comm, + const ViewType& element_export_ranks, + const std::vector& neighbor_ranks, + const int mpi_tag = 1221 ) + : CommunicationPlan( comm ) + { + this->createFromExportsAndTopology( + element_export_ranks, neighbor_ranks, mpi_tag ); + this->createExportSteering( element_export_ranks ); + } + + /*! + \brief Export rank constructor. Use this when you don't know who you + will be receiving from - only who you are sending to. This is less + efficient than if we already knew who our neighbors were because we have + to determine the topology of the point-to-point communication first. + + \tparam ViewType The container type for the export element ranks. This + container type can be either a Kokkos View or a Cabana Slice. + + \param comm The MPI communicator over which the distributor is defined. + + \param element_export_ranks The destination rank in the target + decomposition of each locally owned element in the source + decomposition. Each element will have one unique destination to which it + will be exported. This export rank may any one of the listed neighbor + ranks which can include the calling rank. An export rank of -1 will + signal that this element is *not* to be exported and will be ignored in + the data migration. The input is expected to be a Kokkos view or Cabana + slice in the same memory space as the distributor. + + \param mpi_tag The MPI tag to use for non-blocking communication in the + communication plan generation. + + \note For elements that you do not wish to export, use an export rank of + -1 to signal that this element is *not* to be exported and will be + ignored in the data migration. In other words, this element will be + *completely* removed in the new decomposition. If the data is staying on + this rank, just use this rank as the export destination and the data + will be efficiently migrated. + */ + template + Distributor( MPI_Comm comm, + const ViewType& element_export_ranks, + const int mpi_tag = 1221 ) + : CommunicationPlan( comm ) + { + this->createFromExportsOnly( element_export_ranks, mpi_tag ); + this->createExportSteering( element_export_ranks ); + } +}; + +//---------------------------------------------------------------------------// +// Static type checker. +template +struct is_distributor : public std::false_type {}; + +template +struct is_distributor > + : public std::true_type {}; + +template +struct is_distributor > + : public std::true_type {}; + +//---------------------------------------------------------------------------// +namespace Impl +{ + +//---------------------------------------------------------------------------// +// Synchronously move data between a source and destination AoSoA by executing +// the forward communication plan. +template +void distributeData( + const Distributor_t& distributor, + const AoSoA_t& src, + AoSoA_t& dst, + int mpi_tag, + typename std::enable_if<(is_distributor::value && + is_aosoa::value), + int>::type * = 0 ) +{ + // Get the MPI rank we are currently on. + int my_rank = -1; + MPI_Comm_rank( distributor.comm(), &my_rank ); + + // Get the number of neighbors. + int num_n = distributor.numNeighbor(); + + // Calculate the number of elements that are staying on this rank and + // therefore can be directly copied. If any of the neighbor ranks are this + // rank it will be stored in first position (i.e. the first neighbor in + // the local list is always yourself if you are sending to yourself). + std::size_t num_stay = ( distributor.neighborRank(0) == my_rank ) + ? distributor.numExport(0) : 0; + + // Allocate a send buffer. + std::size_t num_send = distributor.totalNumExport() - num_stay; + Kokkos::View + send_buffer( + Kokkos::ViewAllocateWithoutInitializing("distributor_send_buffer"), + num_send ); + + // Allocate a receive buffer. + Kokkos::View + recv_buffer( + Kokkos::ViewAllocateWithoutInitializing("distributor_recv_buffer"), + distributor.totalNumImport() ); + + // Get the steering vector for the sends. + auto steering = distributor.getExportSteering(); + + // Gather the exports from the source AoSoA into the tuple-contiguous send + // buffer or the receive buffer if the data is staying. We know that the + // steering vector is ordered such that the data staying on this rank + // comes first. + auto build_send_buffer_func = + KOKKOS_LAMBDA( const std::size_t i ) + { + auto tpl = src.getTuple( steering(i) ); + if ( i < num_stay ) + recv_buffer( i ) = tpl; + else + send_buffer( i - num_stay ) = tpl; + }; + Kokkos::RangePolicy + build_send_buffer_policy( 0, distributor.totalNumExport() ); + Kokkos::parallel_for( "Cabana::Impl::distributeData::build_send_buffer", + build_send_buffer_policy, + build_send_buffer_func ); + Kokkos::fence(); + + // Post non-blocking receives. + std::vector requests; + requests.reserve( num_n ); + std::pair recv_range = { 0, 0 }; + for ( int n = 0; n < num_n; ++n ) + { + recv_range.second = recv_range.first + distributor.numImport(n); + + if ( (distributor.numImport(n) > 0) && + (distributor.neighborRank(n) != my_rank) ) + { + auto recv_subview = Kokkos::subview( recv_buffer, recv_range ); + + requests.push_back( MPI_Request() ); + + MPI_Irecv( recv_subview.data(), + recv_subview.size() * sizeof(typename AoSoA_t::tuple_type), + MPI_BYTE, + distributor.neighborRank(n), + mpi_tag, + distributor.comm(), + &(requests.back()) ); + } + + recv_range.first = recv_range.second; + } + + // Do blocking sends. + std::pair send_range = { 0, 0 }; + for ( int n = 0; n < num_n; ++n ) + { + if ( (distributor.numExport(n) > 0) && + (distributor.neighborRank(n) != my_rank) ) + { + send_range.second = send_range.first + distributor.numExport(n); + + auto send_subview = Kokkos::subview( send_buffer, send_range ); + + MPI_Send( send_subview.data(), + send_subview.size() * sizeof(typename AoSoA_t::tuple_type), + MPI_BYTE, + distributor.neighborRank(n), + mpi_tag, + distributor.comm() ); + + send_range.first = send_range.second; + } + } + + // Wait on non-blocking receives. + std::vector status( requests.size() ); + const int ec = + MPI_Waitall( requests.size(), requests.data(), status.data() ); + assert( MPI_SUCCESS == ec ); + + // Extract the receive buffer into the destination AoSoA. + auto extract_recv_buffer_func = + KOKKOS_LAMBDA( const std::size_t i ) + { dst.setTuple( i, recv_buffer(i) ); }; + Kokkos::RangePolicy + extract_recv_buffer_policy( 0, distributor.totalNumImport() ); + Kokkos::parallel_for( "Cabana::Impl::distributeData::extract_recv_buffer", + extract_recv_buffer_policy, + extract_recv_buffer_func ); + Kokkos::fence(); + + // Barrier before completing to ensure synchronization. + MPI_Barrier( distributor.comm() ); +} + +//---------------------------------------------------------------------------// + +} // end namespace Impl + +//---------------------------------------------------------------------------// +/*! + \brief Synchronously migrate data between two different decompositions using + the distributor forward communication plan. Multiple AoSoA version. + + Migrate moves all data to a new distribution that is uniquely owned - each + element will only have a single destination rank. + + \tparam Distributor_t Distributor type - must be a distributor. + + \tparam AoSoA_t AoSoA type - must be an AoSoA. + + \param distributor The distributor to use for the migration. + + \param src The AoSoA containing the data to be migrated. Must have the same + number of elements as the inputs used to construct the distributor. + + \param dst The AoSoA to which the migrated data will be written. Must be the + same size as the number of imports given by the distributor on this + rank. Call totalNumImport() on the distributor to get this size value. + + \param mpi_tag The MPI tag to use for non-blocking communication in the data + migration. Note here that if multiple instances of this function are being + called at once then different tags should be used in each function call to + avoid any communication conflicts. +*/ +template +void migrate( const Distributor_t& distributor, + const AoSoA_t& src, + AoSoA_t& dst, + int mpi_tag = 1001, + typename std::enable_if<(is_distributor::value && + is_aosoa::value), + int>::type * = 0 ) +{ + // Check that src and dst are the right size. + if ( src.size() != distributor.exportSize() ) + throw std::runtime_error("Source is the wrong size for migration!"); + if ( dst.size() != distributor.totalNumImport() ) + throw std::runtime_error("Destination is the wrong size for migration!"); + + // Move the data. + Impl::distributeData( distributor, src, dst, mpi_tag ); +} + +//---------------------------------------------------------------------------// +/*! + \brief Synchronously migrate data between two different decompositions using + the distributor forward communication plan. Single AoSoA version that will + resize in-place. Note that resizing does not necessarily allocate more + memory. The AoSoA memory will only increase if not enough has already been + reserved/allocated for the needed number of elements. + + Migrate moves all data to a new distribution that is uniquely owned - each + element will only have a single destination rank. + + \tparam Distributor_t Distributor type - must be a distributor. + + \tparam AoSoA_t AoSoA type - must be an AoSoA. + + \param distributor The distributor to use for the migration. + + \param AoSoA The AoSoA containing the data to be migrated. Upon input, must + have the same number of elements as the inputs used to construct the + destributor. At output, it will be the same size as th enumber of import + elements on this rank provided by the distributor. Before using this + function, consider reserving enough memory in the data structure so + reallocating is not necessary. + + \param mpi_tag The MPI tag to use for non-blocking communication in the data + migration. Note here that if multiple instances of this function are being + called at once then different tags should be used in each function call to + avoid any communication conflicts. +*/ +template +void migrate( const Distributor_t& distributor, + AoSoA_t& aosoa, + int mpi_tag = 1001, + typename std::enable_if<(is_distributor::value && + is_aosoa::value), + int>::type * = 0 ) +{ + // Check that the AoSoA is the right size. + if ( aosoa.size() != distributor.exportSize() ) + throw std::runtime_error("AoSoA is the wrong size for migration!"); + + // Determine if the source of destination decomposition has more data on + // this rank. + bool dst_is_bigger = + ( distributor.totalNumImport() > distributor.exportSize() ); + + // If the destination decomposition is bigger than the source + // decomposition resize now so we have enough space to do the operation. + if ( dst_is_bigger ) + aosoa.resize( distributor.totalNumImport() ); + + // Move the data. + Impl::distributeData( distributor, aosoa, aosoa, mpi_tag ); + + // If the destination decomposition is smaller than the source + // decomposition resize after we have moved the data. + if ( !dst_is_bigger ) + aosoa.resize( distributor.totalNumImport() ); +} + +//---------------------------------------------------------------------------// +/*! + \brief Synchronously migrate data between two different decompositions using + the distributor forward communication plan. Slice version. The user can do + this in-place with the same slice but they will need to manage the resizing + themselves as we can't resize slices. + + Migrate moves all data to a new distribution that is uniquely owned - each + element will only have a single destination rank. + + \tparam Distributor_t Distributor type - must be a distributor. + + \tparam Slice_t Slice type - must be an Slice. + + \param distributor The distributor to use for the migration. + + \param src The slice containing the data to be migrated. Must have the same + number of elements as the inputs used to construct the destributor. + + \param dst The slice to which the migrated data will be written. Must be the + same size as the number of imports given by the distributor on this + rank. Call totalNumImport() on the distributor to get this size value. + + \param mpi_tag The MPI tag to use for non-blocking communication in the data + migration. Note here that if multiple instances of this function are being + called at once then different tags should be used in each function call to + avoid any communication conflicts. +*/ +template +void migrate( const Distributor_t& distributor, + const Slice_t& src, + Slice_t& dst, + int mpi_tag = 1001, + typename std::enable_if<(is_distributor::value && + is_slice::value), + int>::type * = 0 ) +{ + // Check that src and dst are the right size. + if ( src.size() != distributor.exportSize() ) + throw std::runtime_error("Source is the wrong size for migration!"); + if ( dst.size() != distributor.totalNumImport() ) + throw std::runtime_error("Destination is the wrong size for migration!"); + + // Get the number of components in the slices. + int num_comp = 1; + for ( int d = 2; d < src.rank(); ++d ) + num_comp *= src.extent(d); + + // Get the raw slice data. + auto src_data = src.data(); + auto dst_data = dst.data(); + + // Get the MPI rank we are currently on. + int my_rank = -1; + MPI_Comm_rank( distributor.comm(), &my_rank ); + + // Get the number of neighbors. + int num_n = distributor.numNeighbor(); + + // Calculate the number of elements that are staying on this rank and + // therefore can be directly copied. If any of the neighbor ranks are this + // rank it will be stored in first position (i.e. the first neighbor in + // the local list is always yourself if you are sending to yourself). + std::size_t num_stay = ( distributor.neighborRank(0) == my_rank ) + ? distributor.numExport(0) : 0; + + // Allocate a send buffer. Note this one is layout right so the components + // of each element are consecutive in memory. + std::size_t num_send = distributor.totalNumExport() - num_stay; + Kokkos::View + send_buffer( + Kokkos::ViewAllocateWithoutInitializing("distributor_send_buffer"), + num_send, num_comp ); + + // Allocate a receive buffer. Note this one is layout right so the components + // of each element are consecutive in memory. + Kokkos::View + recv_buffer( + Kokkos::ViewAllocateWithoutInitializing("distributor_recv_buffer"), + distributor.totalNumImport(), num_comp ); + + // Get the steering vector for the sends. + auto steering = distributor.getExportSteering(); + + // Gather from the source Slice into the contiguous send buffer or, + // if it is part of the local copy, put it directly in the destination + // Slice. + auto build_send_buffer_func = + KOKKOS_LAMBDA( const std::size_t i ) + { + auto s_src = Slice_t::index_type::s( steering(i) ); + auto a_src = Slice_t::index_type::a( steering(i) ); + std::size_t src_offset = s_src*src.stride(0) + a_src; + if ( i < num_stay ) + for ( int n = 0; n < num_comp; ++n ) + recv_buffer( i, n ) = + src_data[ src_offset + n * Slice_t::vector_length ]; + else + for ( int n = 0; n < num_comp; ++n ) + send_buffer( i - num_stay, n ) = + src_data[ src_offset + n * Slice_t::vector_length ]; + }; + Kokkos::RangePolicy + build_send_buffer_policy( 0, distributor.totalNumExport() ); + Kokkos::parallel_for( "Cabana::migrate::build_send_buffer", + build_send_buffer_policy, + build_send_buffer_func ); + Kokkos::fence(); + + // Post non-blocking receives. + std::vector requests; + requests.reserve( num_n ); + std::pair recv_range = { 0, 0 }; + for ( int n = 0; n < num_n; ++n ) + { + recv_range.second = recv_range.first + distributor.numImport(n); + + if ( (distributor.numImport(n) > 0) && + (distributor.neighborRank(n) != my_rank) ) + { + auto recv_subview = Kokkos::subview( + recv_buffer, recv_range, Kokkos::ALL ); + + requests.push_back( MPI_Request() ); + + MPI_Irecv( recv_subview.data(), + recv_subview.size() * sizeof(typename Slice_t::value_type), + MPI_BYTE, + distributor.neighborRank(n), + mpi_tag, + distributor.comm(), + &(requests.back()) ); + } + + recv_range.first = recv_range.second; + } + + // Do blocking sends. + std::pair send_range = { 0, 0 }; + for ( int n = 0; n < num_n; ++n ) + { + if ( (distributor.numExport(n) > 0) && + (distributor.neighborRank(n) != my_rank) ) + { + send_range.second = send_range.first + distributor.numExport(n); + + auto send_subview = Kokkos::subview( + send_buffer, send_range, Kokkos::ALL ); + + MPI_Send( send_subview.data(), + send_subview.size() * sizeof(typename Slice_t::value_type), + MPI_BYTE, + distributor.neighborRank(n), + mpi_tag, + distributor.comm() ); + + send_range.first = send_range.second; + } + } + + // Wait on non-blocking receives. + std::vector status( requests.size() ); + const int ec = + MPI_Waitall( requests.size(), requests.data(), status.data() ); + assert( MPI_SUCCESS == ec ); + + // Extract the data from the receive buffer into the destination Slice. + auto extract_recv_buffer_func = + KOKKOS_LAMBDA( const std::size_t i ) + { + auto s = Slice_t::index_type::s( i ); + auto a = Slice_t::index_type::a( i ); + std::size_t dst_offset = s*dst.stride(0) + a; + for ( int n = 0; n < num_comp; ++n ) + dst_data[ dst_offset + n * Slice_t::vector_length ] = + recv_buffer( i, n ); + }; + Kokkos::RangePolicy + extract_recv_buffer_policy( 0, distributor.totalNumImport() ); + Kokkos::parallel_for( "Cabana::migrate::extract_recv_buffer", + extract_recv_buffer_policy, + extract_recv_buffer_func ); + Kokkos::fence(); + + // Barrier before completing to ensure synchronization. + MPI_Barrier( distributor.comm() ); +} + +//---------------------------------------------------------------------------// + +} // end namespace Cabana + +#endif // end CABANA_DISTRIBUTOR_HPP diff --git a/core/unit_test/Cuda/tstDistributor_Cuda.cpp b/core/unit_test/Cuda/tstDistributor_Cuda.cpp new file mode 100644 index 000000000..bbc92c129 --- /dev/null +++ b/core/unit_test/Cuda/tstDistributor_Cuda.cpp @@ -0,0 +1,13 @@ +/**************************************************************************** + * Copyright (c) 2018 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#include +#include diff --git a/core/unit_test/OpenMP/tstDistributor_OpenMP.cpp b/core/unit_test/OpenMP/tstDistributor_OpenMP.cpp new file mode 100644 index 000000000..42285bd63 --- /dev/null +++ b/core/unit_test/OpenMP/tstDistributor_OpenMP.cpp @@ -0,0 +1,13 @@ +/**************************************************************************** + * Copyright (c) 2018 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#include +#include diff --git a/core/unit_test/Pthread/tstDistributor_Pthread.cpp b/core/unit_test/Pthread/tstDistributor_Pthread.cpp new file mode 100644 index 000000000..e74cc6309 --- /dev/null +++ b/core/unit_test/Pthread/tstDistributor_Pthread.cpp @@ -0,0 +1,13 @@ +/**************************************************************************** + * Copyright (c) 2018 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#include +#include diff --git a/core/unit_test/Serial/tstDistributor_Serial.cpp b/core/unit_test/Serial/tstDistributor_Serial.cpp new file mode 100644 index 000000000..b2cdb0609 --- /dev/null +++ b/core/unit_test/Serial/tstDistributor_Serial.cpp @@ -0,0 +1,13 @@ +/**************************************************************************** + * Copyright (c) 2018 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#include +#include diff --git a/core/unit_test/tstDistributor.hpp b/core/unit_test/tstDistributor.hpp new file mode 100644 index 000000000..08798e2f8 --- /dev/null +++ b/core/unit_test/tstDistributor.hpp @@ -0,0 +1,676 @@ +/**************************************************************************** + * Copyright (c) 2018 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#include +#include +#include +#include + +#include + +#include + +#include + +#include +#include +#include + +namespace Test +{ + +//---------------------------------------------------------------------------// +void test1( const bool use_topology ) +{ + // Make a communication plan. + std::shared_ptr > distributor; + + // Get my rank. + int my_rank = -1; + MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); + + // Every rank will communicate with itself and send all of its data. + int num_data = 10; + Kokkos::View + export_ranks( "export_ranks", num_data ); + Kokkos::deep_copy( export_ranks, my_rank ); + std::vector neighbor_ranks( 1, my_rank ); + + // Create the plan. + if ( use_topology ) + distributor = std::make_shared >( + MPI_COMM_WORLD, export_ranks, neighbor_ranks ); + else + distributor = std::make_shared >( + MPI_COMM_WORLD, export_ranks ); + + // Make some data to migrate. + using DataTypes = Cabana::MemberTypes; + using AoSoA_t = Cabana::AoSoA; + AoSoA_t data_src( num_data ); + auto slice_int_src = data_src.slice<0>(); + auto slice_dbl_src = data_src.slice<1>(); + + // Fill the data. + auto fill_func = + KOKKOS_LAMBDA( const int i ) + { + slice_int_src(i) = my_rank + i; + slice_dbl_src(i,0) = my_rank + i; + slice_dbl_src(i,1) = my_rank + i + 0.5; + }; + Kokkos::RangePolicy range_policy( 0, num_data ); + Kokkos::parallel_for( range_policy, fill_func ); + Kokkos::fence(); + + // Create a second set of data to which we will migrate. + AoSoA_t data_dst( num_data ); + auto slice_int_dst = data_dst.slice<0>(); + auto slice_dbl_dst = data_dst.slice<1>(); + + // Do the migration + Cabana::migrate( *distributor, data_src, data_dst ); + + // Check the migration. + Cabana::AoSoA data_dst_host( num_data ); + auto slice_int_dst_host = data_dst_host.slice<0>(); + auto slice_dbl_dst_host = data_dst_host.slice<1>(); + Cabana::deep_copy( data_dst_host, data_dst ); + auto steering = distributor->getExportSteering(); + auto host_steering = Kokkos::create_mirror_view_and_copy( + Kokkos::HostSpace(), steering ); + for ( int i = 0; i < num_data; ++i ) + { + EXPECT_EQ( slice_int_dst_host(i), my_rank + host_steering(i) ); + EXPECT_EQ( slice_dbl_dst_host(i,0), my_rank + host_steering(i) ); + EXPECT_EQ( slice_dbl_dst_host(i,1), my_rank + host_steering(i) + 0.5 ); + } +} + +//---------------------------------------------------------------------------// +void test2( const bool use_topology ) +{ + // Make a communication plan. + std::shared_ptr > distributor; + + // Get my rank. + int my_rank = -1; + MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); + + // Every rank will communicate with itself and send every other piece of data. + int num_data = 10; + Kokkos::View export_ranks_host( + "export_ranks", num_data ); + for ( int n = 0; n < num_data; ++n ) + export_ranks_host(n) = ( 0 == n%2 ) ? my_rank : -1; + auto export_ranks = Kokkos::create_mirror_view_and_copy( + TEST_MEMSPACE(), export_ranks_host ); + std::vector neighbor_ranks( 1, my_rank ); + + // Create the plan + if ( use_topology ) + distributor = std::make_shared >( + MPI_COMM_WORLD, export_ranks, neighbor_ranks ); + else + distributor = std::make_shared >( + MPI_COMM_WORLD, export_ranks ); + + // Make some data to migrate. + using DataTypes = Cabana::MemberTypes; + using AoSoA_t = Cabana::AoSoA; + AoSoA_t data( num_data ); + auto slice_int = data.slice<0>(); + auto slice_dbl = data.slice<1>(); + + // Fill the data. + auto fill_func = + KOKKOS_LAMBDA( const int i ) + { + slice_int(i) = my_rank + i; + slice_dbl(i,0) = my_rank + i; + slice_dbl(i,1) = my_rank + i + 0.5; + }; + Kokkos::RangePolicy + range_policy( 0, num_data ); + Kokkos::parallel_for( range_policy, fill_func ); + Kokkos::fence(); + + // Do the migration in-place + Cabana::migrate( *distributor, data ); + + // Get host copies of the migrated data. + Cabana::AoSoA data_host( num_data / 2 ); + auto slice_int_host = data_host.slice<0>(); + auto slice_dbl_host = data_host.slice<1>(); + Cabana::deep_copy( data_host, data ); + + // Check the migration. We received less than we sent so this should have + // resized the aososa. + auto steering = distributor->getExportSteering(); + auto host_steering = Kokkos::create_mirror_view_and_copy( + Kokkos::HostSpace(), steering ); + EXPECT_EQ( data.size(), num_data / 2 ); + for ( int i = 0; i < num_data / 2; ++i ) + { + EXPECT_EQ( slice_int_host(i), my_rank + host_steering(i) ); + EXPECT_EQ( slice_dbl_host(i,0), my_rank + host_steering(i) ); + EXPECT_EQ( slice_dbl_host(i,1), my_rank + host_steering(i) + 0.5 ); + } +} + +//---------------------------------------------------------------------------// +void test3( const bool use_topology ) +{ + // Make a communication plan. + std::shared_ptr > distributor; + + // Get my rank. + int my_rank = -1; + MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); + + // Get my size. + int my_size = -1; + MPI_Comm_size( MPI_COMM_WORLD, &my_size ); + + // Compute the inverse rank. + int inverse_rank = my_size - my_rank - 1; + + // Every rank will communicate with the rank that is its inverse. + int num_data = 10; + Kokkos::View + export_ranks( "export_ranks", num_data ); + Kokkos::deep_copy( export_ranks, inverse_rank ); + std::vector neighbor_ranks( 1, inverse_rank ); + + // Create the plan with both export ranks and the topology. + if ( use_topology ) + distributor = std::make_shared >( + MPI_COMM_WORLD, export_ranks, neighbor_ranks ); + else + distributor = std::make_shared >( + MPI_COMM_WORLD, export_ranks ); + + // Make some data to migrate. + using DataTypes = Cabana::MemberTypes; + using AoSoA_t = Cabana::AoSoA; + AoSoA_t data_src( num_data ); + auto slice_int_src = data_src.slice<0>(); + auto slice_dbl_src = data_src.slice<1>(); + + // Fill the data. + auto fill_func = + KOKKOS_LAMBDA( const int i ) + { + slice_int_src(i) = my_rank + i; + slice_dbl_src(i,0) = my_rank + i; + slice_dbl_src(i,1) = my_rank + i + 0.5; + }; + Kokkos::RangePolicy + range_policy( 0, num_data ); + Kokkos::parallel_for( range_policy, fill_func ); + Kokkos::fence(); + + // Create a second set of data to which we will migrate. + AoSoA_t data_dst( num_data ); + auto slice_int_dst = data_dst.slice<0>(); + auto slice_dbl_dst = data_dst.slice<1>(); + + // Do the migration with slices + Cabana::migrate( *distributor, slice_int_src, slice_int_dst ); + Cabana::migrate( *distributor, slice_dbl_src, slice_dbl_dst ); + + // Exchange steering vectors with your inverse rank so we know what order + // they sent us stuff in. We thread the creation of the steering vector so + // its order is not deterministic. + auto my_steering = distributor->getExportSteering(); + Kokkos::View + inverse_steering( "inv_steering", distributor->totalNumImport() ); + int mpi_tag = 1030; + MPI_Request request; + MPI_Irecv( inverse_steering.data(), inverse_steering.size(), + MPI_UNSIGNED_LONG_LONG, inverse_rank, mpi_tag, + MPI_COMM_WORLD, &request ); + MPI_Send( my_steering.data(), my_steering.size(), + MPI_UNSIGNED_LONG_LONG, inverse_rank, mpi_tag, + MPI_COMM_WORLD ); + MPI_Status status; + MPI_Wait( &request, &status ); + auto host_steering = Kokkos::create_mirror_view_and_copy( + Kokkos::HostSpace(), inverse_steering ); + + // Check the migration. + Cabana::AoSoA data_dst_host( num_data ); + Cabana::deep_copy( data_dst_host, data_dst ); + auto slice_int_dst_host = data_dst_host.slice<0>(); + auto slice_dbl_dst_host = data_dst_host.slice<1>(); + for ( int i = 0; i < num_data; ++i ) + { + EXPECT_EQ( slice_int_dst_host(i), inverse_rank + host_steering(i) ); + EXPECT_EQ( slice_dbl_dst_host(i,0), inverse_rank + host_steering(i) ); + EXPECT_EQ( slice_dbl_dst_host(i,1), inverse_rank + host_steering(i) + 0.5 ); + } +} + +//---------------------------------------------------------------------------// +void test4( const bool use_topology ) +{ + // Make a communication plan. + std::shared_ptr > distributor; + + // Get my rank. + int my_rank = -1; + MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); + + // Get my size. + int my_size = -1; + MPI_Comm_size( MPI_COMM_WORLD, &my_size ); + + // Every rank will communicate with all other ranks. Interleave the sends. + int num_data = 2 * my_size; + Kokkos::View export_ranks_host( + "export_ranks", num_data ); + std::vector neighbor_ranks( my_size ); + for ( int n = 0; n < my_size; ++n ) + { + export_ranks_host[n] = n; + export_ranks_host[n + my_size] = n; + neighbor_ranks[n] = n; + } + auto export_ranks = Kokkos::create_mirror_view_and_copy( + TEST_MEMSPACE(), export_ranks_host ); + for ( int n = 0; n < my_size; ++n ) + { + export_ranks[n] = n; + export_ranks[n + my_size] = n; + neighbor_ranks[n] = n; + } + + // Create the plan + if ( use_topology ) + distributor = std::make_shared >( + MPI_COMM_WORLD, export_ranks, neighbor_ranks ); + else + distributor = std::make_shared >( + MPI_COMM_WORLD, export_ranks ); + + // Make some data to migrate. + using DataTypes = Cabana::MemberTypes; + using AoSoA_t = Cabana::AoSoA; + AoSoA_t data_src( num_data ); + auto slice_int_src = data_src.slice<0>(); + auto slice_dbl_src = data_src.slice<1>(); + + // Fill the data. + auto fill_func = + KOKKOS_LAMBDA( const int i ) + { + slice_int_src(i) = my_rank; + slice_dbl_src(i,0) = my_rank; + slice_dbl_src(i,1) = my_rank + 0.5; + }; + Kokkos::RangePolicy + range_policy( 0, num_data ); + Kokkos::parallel_for( range_policy, fill_func ); + Kokkos::fence(); + + // Create a second set of data to which we will migrate. + AoSoA_t data_dst( num_data ); + auto slice_int_dst = data_dst.slice<0>(); + auto slice_dbl_dst = data_dst.slice<1>(); + + // Do the migration + Cabana::migrate( *distributor, data_src, data_dst ); + + // Check the migration. + Cabana::AoSoA data_dst_host( num_data ); + auto slice_int_dst_host = data_dst_host.slice<0>(); + auto slice_dbl_dst_host = data_dst_host.slice<1>(); + Cabana::deep_copy( data_dst_host, data_dst ); + + // self sends + EXPECT_EQ( slice_int_dst_host(0), my_rank ); + EXPECT_EQ( slice_dbl_dst_host(0,0), my_rank ); + EXPECT_EQ( slice_dbl_dst_host(0,1), my_rank + 0.5 ); + + EXPECT_EQ( slice_int_dst_host(1), my_rank ); + EXPECT_EQ( slice_dbl_dst_host(1,0), my_rank ); + EXPECT_EQ( slice_dbl_dst_host(1,1), my_rank + 0.5 ); + + // others + for ( int i = 1; i < my_size; ++i ) + { + if ( i == my_rank ) + { + EXPECT_EQ( slice_int_dst_host(2*i), 0 ); + EXPECT_EQ( slice_dbl_dst_host(2*i,0), 0 ); + EXPECT_EQ( slice_dbl_dst_host(2*i,1), 0.5 ); + + EXPECT_EQ( slice_int_dst_host(2*i + 1), 0 ); + EXPECT_EQ( slice_dbl_dst_host(2*i + 1,0), 0 ); + EXPECT_EQ( slice_dbl_dst_host(2*i + 1,1), 0.5 ); + } + else + { + EXPECT_EQ( slice_int_dst_host(2*i), i ); + EXPECT_EQ( slice_dbl_dst_host(2*i,0), i ); + EXPECT_EQ( slice_dbl_dst_host(2*i,1), i + 0.5 ); + + EXPECT_EQ( slice_int_dst_host(2*i + 1), i ); + EXPECT_EQ( slice_dbl_dst_host(2*i + 1,0), i ); + EXPECT_EQ( slice_dbl_dst_host(2*i + 1,1), i + 0.5 ); + } + } +} + +//---------------------------------------------------------------------------// +void test5( const bool use_topology ) +{ + // Make a communication plan. + std::shared_ptr > distributor; + + // Get my rank. + int my_rank = -1; + MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); + + // Get my size. + int my_size = -1; + MPI_Comm_size( MPI_COMM_WORLD, &my_size ); + + // Every rank will communicate with all other ranks. Interleave the sends + // and only send every other value. + int num_data = 2 * my_size; + Kokkos::View export_ranks_host( + "export_ranks", num_data ); + std::vector neighbor_ranks( my_size ); + for ( int n = 0; n < my_size; ++n ) + { + export_ranks_host[n] = -1; + export_ranks_host[n + my_size] = n; + neighbor_ranks[n] = n; + } + auto export_ranks = Kokkos::create_mirror_view_and_copy( + TEST_MEMSPACE(), export_ranks_host ); + + // Create the plan + if ( use_topology ) + distributor = std::make_shared >( + MPI_COMM_WORLD, export_ranks, neighbor_ranks ); + else + distributor = std::make_shared >( + MPI_COMM_WORLD, export_ranks ); + + // Make some data to migrate. + using DataTypes = Cabana::MemberTypes; + using AoSoA_t = Cabana::AoSoA; + AoSoA_t data_src( num_data ); + auto slice_int_src = data_src.slice<0>(); + auto slice_dbl_src = data_src.slice<1>(); + + // Fill the data. + auto fill_func = + KOKKOS_LAMBDA( const int i ) + { + slice_int_src(i) = my_rank; + slice_dbl_src(i,0) = my_rank; + slice_dbl_src(i,1) = my_rank + 0.5; + }; + Kokkos::RangePolicy + range_policy( 0, num_data ); + Kokkos::parallel_for( range_policy, fill_func ); + Kokkos::fence(); + + // Create a second set of data to which we will migrate. + AoSoA_t data_dst( my_size ); + auto slice_int_dst = data_dst.slice<0>(); + auto slice_dbl_dst = data_dst.slice<1>(); + + // Do the migration with slices + Cabana::migrate( *distributor, slice_int_src, slice_int_dst ); + Cabana::migrate( *distributor, slice_dbl_src, slice_dbl_dst ); + + // Check the migration. + Cabana::AoSoA data_host( my_size ); + auto slice_int_host = data_host.slice<0>(); + auto slice_dbl_host = data_host.slice<1>(); + Cabana::deep_copy( data_host, data_dst ); + + // self sends + EXPECT_EQ( slice_int_host(0), my_rank ); + EXPECT_EQ( slice_dbl_host(0,0), my_rank ); + EXPECT_EQ( slice_dbl_host(0,1), my_rank + 0.5 ); + + // others + for ( int i = 1; i < my_size; ++i ) + { + if ( i == my_rank ) + { + EXPECT_EQ( slice_int_host(i), 0 ); + EXPECT_EQ( slice_dbl_host(i,0), 0 ); + EXPECT_EQ( slice_dbl_host(i,1), 0.5 ); + } + else + { + EXPECT_EQ( slice_int_host(i), i ); + EXPECT_EQ( slice_dbl_host(i,0), i ); + EXPECT_EQ( slice_dbl_host(i,1), i + 0.5 ); + } + } +} + +//---------------------------------------------------------------------------// +void test6( const bool use_topology ) +{ + // Make a communication plan. + std::shared_ptr > distributor; + + // Get my rank. + int my_rank = -1; + MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); + + // Get the comm size. + int my_size = -1; + MPI_Comm_size( MPI_COMM_WORLD, &my_size ); + + // Every has one element and will send that element to rank 0. + int num_data = 1; + Kokkos::View + export_ranks( "export_ranks", num_data ); + Kokkos::deep_copy( export_ranks, 0 ); + std::vector neighbor_ranks; + if ( 0 == my_rank ) + { + neighbor_ranks.resize( my_size ); + std::iota( neighbor_ranks.begin(), neighbor_ranks.end(), 0 ); + } + else + { + neighbor_ranks.assign( 1, 0 ); + } + + // Create the plan. + if ( use_topology ) + distributor = std::make_shared >( + MPI_COMM_WORLD, export_ranks, neighbor_ranks ); + else + distributor = std::make_shared >( + MPI_COMM_WORLD, export_ranks ); + + // Make some data to migrate. + using DataTypes = Cabana::MemberTypes; + using AoSoA_t = Cabana::AoSoA; + AoSoA_t data( num_data ); + auto slice_int = data.slice<0>(); + auto slice_dbl = data.slice<1>(); + + // Fill the data. + auto fill_func = + KOKKOS_LAMBDA( const int i ) + { + slice_int(i) = my_rank; + slice_dbl(i,0) = my_rank; + slice_dbl(i,1) = my_rank + 0.5; + }; + Kokkos::RangePolicy range_policy( 0, num_data ); + Kokkos::parallel_for( range_policy, fill_func ); + Kokkos::fence(); + + // Do the migration + Cabana::migrate( *distributor, data ); + + // Check the change in size. + if ( 0 == my_rank ) + EXPECT_EQ( data.size(), my_size ); + else + EXPECT_EQ( data.size(), 0 ); + + // Check the migration. + Cabana::AoSoA + data_host( distributor->totalNumImport() ); + auto slice_int_host = data_host.slice<0>(); + auto slice_dbl_host = data_host.slice<1>(); + Cabana::deep_copy( data_host, data ); + auto steering = distributor->getExportSteering(); + auto host_steering = Kokkos::create_mirror_view_and_copy( + Kokkos::HostSpace(), steering ); + for ( int i = 0; i < distributor->totalNumImport(); ++i ) + { + EXPECT_EQ( slice_int_host(i), distributor->neighborRank(i) ); + EXPECT_EQ( slice_dbl_host(i,0), distributor->neighborRank(i) ); + EXPECT_EQ( slice_dbl_host(i,1), distributor->neighborRank(i) + 0.5 ); + } +} + +//---------------------------------------------------------------------------// +void test7( const bool use_topology ) +{ + // Make a communication plan. + std::shared_ptr > distributor; + + // Get my rank. + int my_rank = -1; + MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); + + // Get the comm size. + int my_size = -1; + MPI_Comm_size( MPI_COMM_WORLD, &my_size ); + + // Rank 0 starts with all the data and sends one element to every rank. + int num_data = (0 == my_rank) ? my_size : 0; + Kokkos::View + export_ranks( "export_ranks", num_data ); + auto fill_ranks = + KOKKOS_LAMBDA( const int i ) + { export_ranks(i) = i; }; + Kokkos::RangePolicy range_policy( 0, num_data ); + Kokkos::parallel_for( range_policy, fill_ranks ); + Kokkos::fence(); + std::vector neighbor_ranks; + if ( 0 == my_rank ) + { + neighbor_ranks.resize( my_size ); + std::iota( neighbor_ranks.begin(), neighbor_ranks.end(), 0 ); + } + else + { + neighbor_ranks.assign( 1, 0 ); + } + + // Create the plan. + if ( use_topology ) + distributor = std::make_shared >( + MPI_COMM_WORLD, export_ranks, neighbor_ranks ); + else + distributor = std::make_shared >( + MPI_COMM_WORLD, export_ranks ); + + // Make some data to migrate. + using DataTypes = Cabana::MemberTypes; + using AoSoA_t = Cabana::AoSoA; + AoSoA_t data( num_data ); + auto slice_int = data.slice<0>(); + auto slice_dbl = data.slice<1>(); + + // Fill the data. + auto fill_func = + KOKKOS_LAMBDA( const int i ) + { + slice_int(i) = i; + slice_dbl(i,0) = i; + slice_dbl(i,1) = i + 0.5; + }; + Kokkos::parallel_for( range_policy, fill_func ); + Kokkos::fence(); + + // Do the migration + Cabana::migrate( *distributor, data ); + + // Check the change in size. + EXPECT_EQ( data.size(), 1 ); + + // Check the migration. + Cabana::AoSoA + data_host( distributor->totalNumImport() ); + auto slice_int_host = data_host.slice<0>(); + auto slice_dbl_host = data_host.slice<1>(); + Cabana::deep_copy( data_host, data ); + EXPECT_EQ( slice_int_host(0), my_rank ); + EXPECT_EQ( slice_dbl_host(0,0), my_rank ); + EXPECT_EQ( slice_dbl_host(0,1), my_rank + 0.5 ); +} + +//---------------------------------------------------------------------------// +// RUN TESTS +//---------------------------------------------------------------------------// +TEST_F( TEST_CATEGORY, distributor_test_1 ) +{ test1(true); } + +TEST_F( TEST_CATEGORY, distributor_test_2 ) +{ test2(true); } + +TEST_F( TEST_CATEGORY, distributor_test_3 ) +{ test3(true); } + +TEST_F( TEST_CATEGORY, distributor_test_4 ) +{ test4(true); } + +TEST_F( TEST_CATEGORY, distributor_test_5 ) +{ test5(true); } + +TEST_F( TEST_CATEGORY, distributor_test_6 ) +{ test6(true); } + +TEST_F( TEST_CATEGORY, distributor_test_7 ) +{ test7(true); } + +TEST_F( TEST_CATEGORY, distributor_test_1_no_topo ) +{ test1(false); } + +TEST_F( TEST_CATEGORY, distributor_test_2_no_topo ) +{ test2(false); } + +TEST_F( TEST_CATEGORY, distributor_test_3_no_topo ) +{ test3(false); } + +TEST_F( TEST_CATEGORY, distributor_test_4_no_topo ) +{ test4(false); } + +TEST_F( TEST_CATEGORY, distributor_test_5_no_topo ) +{ test5(false); } + +TEST_F( TEST_CATEGORY, distributor_test_6_no_topo ) +{ test6(false); } + +TEST_F( TEST_CATEGORY, distributor_test_7_no_topo ) +{ test7(false); } + +//---------------------------------------------------------------------------// + +} // end namespace Test From 457977064bbf0f221fb32a11fa2a3f1877bf1f2a Mon Sep 17 00:00:00 2001 From: Stuart Slattery Date: Fri, 21 Dec 2018 12:48:17 -0500 Subject: [PATCH 3/3] adding halo communication plan for halo exchange of ghost data Co-authored-by: dalg24 --- core/src/Cabana_Halo.hpp | 670 +++++++++++++++++++++ core/unit_test/Cuda/tstHalo_Cuda.cpp | 13 + core/unit_test/OpenMP/tstHalo_OpenMP.cpp | 13 + core/unit_test/Pthread/tstHalo_Pthread.cpp | 13 + core/unit_test/Serial/tstHalo_Serial.cpp | 13 + core/unit_test/mpi_unit_test_main.cpp | 27 + core/unit_test/tstHalo.hpp | 424 +++++++++++++ 7 files changed, 1173 insertions(+) create mode 100644 core/src/Cabana_Halo.hpp create mode 100644 core/unit_test/Cuda/tstHalo_Cuda.cpp create mode 100644 core/unit_test/OpenMP/tstHalo_OpenMP.cpp create mode 100644 core/unit_test/Pthread/tstHalo_Pthread.cpp create mode 100644 core/unit_test/Serial/tstHalo_Serial.cpp create mode 100644 core/unit_test/mpi_unit_test_main.cpp create mode 100644 core/unit_test/tstHalo.hpp diff --git a/core/src/Cabana_Halo.hpp b/core/src/Cabana_Halo.hpp new file mode 100644 index 000000000..9ee978074 --- /dev/null +++ b/core/src/Cabana_Halo.hpp @@ -0,0 +1,670 @@ +/**************************************************************************** + * Copyright (c) 2018 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#ifndef CABANA_HALO_HPP +#define CABANA_HALO_HPP + +#include +#include +#include + +#include + +#include + +#include +#include +#include + +namespace Cabana +{ +//---------------------------------------------------------------------------// +/*! + \class Halo + + \brief Halo communication plan for scattering and gathering of ghosted + data. + + \tparam MemorySpace Memory space in which the data for this class will be + allocated. + + The halo allows for scatter and gather operations between locally-owned and + ghosted data. All data in the Halo (e.g. export and import data) is from the + point of view of the forward *GATHER* operation such that, for example, the + number of exports is the number of exports in the gather and the number of + imports is the number of imports in the gather. The reverse *SCATTER* + operation sends the ghosted data back the the uniquely-owned decomposition + and resolves collisions with atomic addition. Based on input for the forward + communication plan (where local data will be sent) the local number of + ghosts is computed. Some nomenclature: + + Export - the local data we uniquely own that we will send to other ranks for + those ranks to be used as ghosts. Export is used in the context of the + forward communication plan (the gather). + + Import - the ghost data that we get from other ranks. The rank we get a + ghost from is the unique owner of that data. Import is used in the context + of the forward communication plan (the gather). +*/ +template +class Halo : public CommunicationPlan +{ + public: + + /*! + \brief Neighbor and export rank constructor. Use this when you already + know which ranks neighbor each other (i.e. every rank already knows who + they will be exporting to and receiving from) as it will be more + efficient. In this case you already know the topology of the + point-to-point communication but not how much data to send and receive + from the neighbors. + + \tparam IdViewType The container type for the export element ids. This + container type can be either a Kokkos View or a Cabana Slice. + + \tparam RankViewType The container type for the export element + ranks. This container type can be either a Kokkos View or a Cabana + Slice. + + \param comm The MPI communicator over which the halo is defined. + + \param num_local The number of locally-owned elements on this rank. + + \param element_export_ids The local ids of the elements that will be + exported to other ranks to be used as ghosts. Element ids may be + repeated in this list if they are sent to multiple destinations. Must be + the same length as element_export_ranks. The input is expected to be a + Kokkos view or Cabana slice in the same memory space as the + communication plan. + + \param element_export_ranks The ranks to which we will send each element + in element_export_ids. In this case each rank must be one of the + neighbor ranks. Must be the same length as element_export_ids. A rank is + allowed to send to itself. The input is expected to be a Kokkos view or + Cabana slice in the same memory space as the communication plan. + + \param neighbor_ranks List of ranks this rank will send to and receive + from. This list can include the calling rank. This is effectively a + description of the topology of the point-to-point communication plan. + + \param mpi_tag The MPI tag to use for non-blocking communication in the + communication plan generation. + + \note Calling this function completely updates the state of this object + and invalidates the previous state. + */ + template + Halo( MPI_Comm comm, + const std::size_t num_local, + const IdViewType& element_export_ids, + const RankViewType& element_export_ranks, + const std::vector& neighbor_ranks, + const int mpi_tag = 1221 ) + : CommunicationPlan( comm ) + , _num_local( num_local ) + { + if ( element_export_ids.size() != element_export_ranks.size() ) + throw std::runtime_error("Export ids and ranks different sizes!"); + + this->createFromExportsAndTopology( + element_export_ranks, neighbor_ranks, mpi_tag ); + this->createExportSteering( element_export_ranks, element_export_ids ); + } + + /*! + \brief Export rank constructor. Use this when you don't know who you + will receiving from - only who you are sending to. This is less + efficient than if we already knew who our neighbors were because we have + to determine the topology of the point-to-point communication first. + + \tparam IdViewType The container type for the export element ids. This + container type can be either a Kokkos View or a Cabana Slice. + + \tparam RankViewType The container type for the export element + ranks. This container type can be either a Kokkos View or a Cabana + Slice. + + \param comm The MPI communicator over which the halo is defined. + + \param num_local The number of locally-owned elements on this rank. + + \param element_export_ids The local ids of the elements that will be + sent to other ranks to be used as ghosts. Element ids may be repeated in + this list if they are sent to multiple destinations. Must be the same + length as element_export_ranks. The input is expected to be a Kokkos + view or Cabana slice in the same memory space as the communication plan. + + \param element_export_ranks The ranks to which we will export each element + in element_export_ids. Must be the same length as + element_export_ids. The neighbor ranks will be determined from this + list. A rank is allowed to send to itself. The input is expected to be a + Kokkos view or Cabana slice in the same memory space as the + communication plan. + + \param mpi_tag The MPI tag to use for non-blocking communication in the + communication plan generation. + + \note Calling this function completely updates the state of this object + and invalidates the previous state. + */ + template + Halo( MPI_Comm comm, + const std::size_t num_local, + const IdViewType& element_export_ids, + const RankViewType& element_export_ranks, + const int mpi_tag = 1221 ) + : CommunicationPlan( comm ) + , _num_local( num_local ) + { + if ( element_export_ids.size() != element_export_ranks.size() ) + throw std::runtime_error("Export ids and ranks different sizes!"); + + this->createFromExportsOnly( element_export_ranks, mpi_tag ); + this->createExportSteering( element_export_ranks, element_export_ids ); + } + + /*! + \brief Get the number of elements locally owned by this rank. + + \return THe number of elements locally owned by this rank. + */ + std::size_t numLocal() const + { return _num_local; } + + /*! + \brief Get the number of ghost elements this rank. Use this to resize a + data structure for scatter/gather operations. For use with scatter + gather, a data structure should be of size numLocal() + numGhost(). + + \return The number of ghosted elements on this rank. + */ + std::size_t numGhost() const + { return this->totalNumImport(); } + + private: + + std::size_t _num_local; +}; + +//---------------------------------------------------------------------------// +// Static type checker. +template +struct is_halo : public std::false_type {}; + +template +struct is_halo > + : public std::true_type {}; + +template +struct is_halo > + : public std::true_type {}; + +//---------------------------------------------------------------------------// +/*! + \brief Synchronously gather data from the local decomposition to the ghosts + using the halo forward communication plan. AoSoA version. This is a + uniquely-owned to multiply-owned communication. + + A gather sends data from a locally owned elements to one or many ranks on + which they exist as ghosts. A locally owned element may be sent to as many + ranks as desired to be used as a ghost on those ranks. The value of the + element in the locally owned decomposition will be the value assigned to the + element in the ghosted decomposition. + + \tparam Halo_t Halo type - must be a Halo. + + \tparam AoSoA_t AoSoA type - must be an AoSoA. + + \param halo The halo to use for the gather. + + \param aosoa The AoSoA on which to perform the gather. The AoSoA should have + a size equivalent to halo.numGhost() + halo.numLocal(). The locally owned + elements are expected to appear first (i.e. in the first halo.numLocal() + elements) and the ghosted elements are expected to appear second (i.e. in + the next halo.numGhost() elements()). + + \param mpi_tag The MPI tag to use for non-blocking communication in the + gather. Note here that if multiple instances of this function are being + called at once then different tags should be used in each function call to + avoid any communication conflicts. +*/ +template +void gather( const Halo_t& halo, + AoSoA_t& aosoa, + int mpi_tag = 1002, + typename std::enable_if<(is_halo::value && + is_aosoa::value), + int>::type * = 0 ) +{ + // Check that the AoSoA is the right size. + if ( aosoa.size() != halo.numLocal() + halo.numGhost() ) + throw std::runtime_error("AoSoA is the wrong size for scatter!"); + + // Allocate a send buffer. + Kokkos::View + send_buffer( + Kokkos::ViewAllocateWithoutInitializing("halo_send_buffer"), + halo.totalNumExport() ); + + // Get the steering vector for the sends. + auto steering = halo.getExportSteering(); + + // Gather from the local data into a tuple-contiguous send buffer. + auto gather_send_buffer_func = + KOKKOS_LAMBDA( const std::size_t i ) + { + send_buffer( i ) = aosoa.getTuple( steering(i) ); + }; + Kokkos::RangePolicy + gather_send_buffer_policy( 0, halo.totalNumExport() ); + Kokkos::parallel_for( "Cabana::gather::gather_send_buffer", + gather_send_buffer_policy, + gather_send_buffer_func ); + Kokkos::fence(); + + // Allocate a receive buffer. + Kokkos::View + recv_buffer( + Kokkos::ViewAllocateWithoutInitializing("halo_recv_buffer"), + halo.totalNumImport() ); + + // Post non-blocking receives. + int num_n = halo.numNeighbor(); + std::vector requests( num_n ); + std::pair recv_range = { 0, 0 }; + for ( int n = 0; n < num_n; ++n ) + { + recv_range.second = + recv_range.first + halo.numImport(n); + + auto recv_subview = Kokkos::subview( recv_buffer, recv_range ); + + MPI_Irecv( recv_subview.data(), + recv_subview.size() * sizeof(typename AoSoA_t::tuple_type), + MPI_BYTE, + halo.neighborRank(n), + mpi_tag, + halo.comm(), + &(requests[n]) ); + + recv_range.first = recv_range.second; + } + + // Do blocking sends. + std::pair send_range = { 0, 0 }; + for ( int n = 0; n < num_n; ++n ) + { + send_range.second = + send_range.first + halo.numExport(n); + + auto send_subview = Kokkos::subview( send_buffer, send_range ); + + MPI_Send( send_subview.data(), + send_subview.size() * sizeof(typename AoSoA_t::tuple_type), + MPI_BYTE, + halo.neighborRank(n), + mpi_tag, + halo.comm() ); + + send_range.first = send_range.second; + } + + // Wait on non-blocking receives. + std::vector status( num_n ); + const int ec = + MPI_Waitall( requests.size(), requests.data(), status.data() ); + assert( MPI_SUCCESS == ec ); + + // Extract the receive buffer into the ghosted elements. + std::size_t num_local = halo.numLocal(); + auto extract_recv_buffer_func = + KOKKOS_LAMBDA( const std::size_t i ) + { + std::size_t ghost_idx = i + num_local; + aosoa.setTuple( ghost_idx, recv_buffer(i) ); + }; + Kokkos::RangePolicy + extract_recv_buffer_policy( 0, halo.totalNumImport() ); + Kokkos::parallel_for( "Cabana::gather::extract_recv_buffer", + extract_recv_buffer_policy, + extract_recv_buffer_func ); + Kokkos::fence(); + + // Barrier before completing to ensure synchronization. + MPI_Barrier( halo.comm() ); +} + +//---------------------------------------------------------------------------// +/*! + \brief Synchronously gather data from the local decomposition to the ghosts + using the halo forward communication plan. Slice version. This is a + uniquely-owned to multiply-owned communication. + + A gather sends data from a locally owned elements to one or many ranks on + which they exist as ghosts. A locally owned element may be sent to as many + ranks as desired to be used as a ghost on those ranks. The value of the + element in the locally owned decomposition will be the value assigned to the + element in the ghosted decomposition. + + \tparam Halo_t Halo type - must be a Halo. + + \tparam Slice_t Slice type - must be a Slice. + + \param halo The halo to use for the gather. + + \param slice The Slice on which to perform the gather. The Slice should have + a size equivalent to halo.numGhost() + halo.numLocal(). The locally owned + elements are expected to appear first (i.e. in the first halo.numLocal() + elements) and the ghosted elements are expected to appear second (i.e. in + the next halo.numGhost() elements()). + + \param mpi_tag The MPI tag to use for non-blocking communication in the + gather. Note here that if multiple instances of this function are being + called at once then different tags should be used in each function call to + avoid any communication conflicts. +*/ +template +void gather( const Halo_t& halo, + Slice_t& slice, + int mpi_tag = 1002, + typename std::enable_if<(is_halo::value && + is_slice::value), + int>::type * = 0 ) +{ + // Check that the Slice is the right size. + if ( slice.size() != halo.numLocal() + halo.numGhost() ) + throw std::runtime_error("Slice is the wrong size for scatter!"); + + // Get the number of components in the slice. + int num_comp = 1; + for ( int d = 2; d < slice.rank(); ++d ) + num_comp *= slice.extent(d); + + // Get the raw slice data. + auto slice_data = slice.data(); + + // Allocate a send buffer. Note this one is layout right so the components + // are consecutive. + Kokkos::View + send_buffer( + Kokkos::ViewAllocateWithoutInitializing("halo_send_buffer"), + halo.totalNumExport(), num_comp ); + + // Get the steering vector for the sends. + auto steering = halo.getExportSteering(); + + // Gather from the local data into a tuple-contiguous send buffer. + auto gather_send_buffer_func = + KOKKOS_LAMBDA( const std::size_t i ) + { + auto s = Slice_t::index_type::s( steering(i) ); + auto a = Slice_t::index_type::a( steering(i) ); + std::size_t slice_offset = s*slice.stride(0) + a; + for ( int n = 0; n < num_comp; ++n ) + send_buffer( i, n ) = + slice_data[ slice_offset + n * Slice_t::vector_length ]; + }; + Kokkos::RangePolicy + gather_send_buffer_policy( 0, halo.totalNumExport() ); + Kokkos::parallel_for( "Cabana::gather::gather_send_buffer", + gather_send_buffer_policy, + gather_send_buffer_func ); + Kokkos::fence(); + + // Allocate a receive buffer. Note this one is layout right so the components + // are consecutive. + Kokkos::View + recv_buffer( + Kokkos::ViewAllocateWithoutInitializing("halo_recv_buffer"), + halo.totalNumImport(), num_comp ); + + // Post non-blocking receives. + int num_n = halo.numNeighbor(); + std::vector requests( num_n ); + std::pair recv_range = { 0, 0 }; + for ( int n = 0; n < num_n; ++n ) + { + recv_range.second = recv_range.first + halo.numImport(n); + + auto recv_subview = + Kokkos::subview( recv_buffer, recv_range, Kokkos::ALL ); + + MPI_Irecv( recv_subview.data(), + recv_subview.size() * sizeof(typename Slice_t::value_type), + MPI_BYTE, + halo.neighborRank(n), + mpi_tag, + halo.comm(), + &(requests[n]) ); + + recv_range.first = recv_range.second; + } + + // Do blocking sends. + std::pair send_range = { 0, 0 }; + for ( int n = 0; n < num_n; ++n ) + { + send_range.second = send_range.first + halo.numExport(n); + + auto send_subview = + Kokkos::subview( send_buffer, send_range, Kokkos::ALL ); + + MPI_Send( send_subview.data(), + send_subview.size() * sizeof(typename Slice_t::value_type), + MPI_BYTE, + halo.neighborRank(n), + mpi_tag, + halo.comm() ); + + send_range.first = send_range.second; + } + + // Wait on non-blocking receives. + std::vector status( num_n ); + const int ec = + MPI_Waitall( requests.size(), requests.data(), status.data() ); + assert( MPI_SUCCESS == ec ); + + // Extract the receive buffer into the ghosted elements. + std::size_t num_local = halo.numLocal(); + auto extract_recv_buffer_func = + KOKKOS_LAMBDA( const std::size_t i ) + { + std::size_t ghost_idx = i + num_local; + auto s = Slice_t::index_type::s( ghost_idx ); + auto a = Slice_t::index_type::a( ghost_idx ); + std::size_t slice_offset = s*slice.stride(0) + a; + for ( int n = 0; n < num_comp; ++n ) + slice_data[ slice_offset + Slice_t::vector_length * n ] = + recv_buffer( i, n ); + }; + Kokkos::RangePolicy + extract_recv_buffer_policy( 0, halo.totalNumImport() ); + Kokkos::parallel_for( "Cabana::gather::extract_recv_buffer", + extract_recv_buffer_policy, + extract_recv_buffer_func ); + Kokkos::fence(); + + // Barrier before completing to ensure synchronization. + MPI_Barrier( halo.comm() ); +} + +//---------------------------------------------------------------------------// +/*! + \brief Synchronously scatter data from the ghosts to the local decomposition + of a slice using the halo reverse communication plan. This is a + multiply-owned to uniquely owned communication. + + In a scatter operation results from ghosted values on other processors are + scattered back to the owning processor of the ghost and the value associated + with the ghost is summed into the locally owned value the ghost + represents. If a locally owned element is ghosted on multiple ranks, then + multiple contributions will be made to the sum, one for each rank. + + \tparam Halo_t Halo type - must be a Halo. + + \tparam Slice_t Slice type - must be a Slice. + + \param halo The halo to use for the scatter. + + \param slice The Slice on which to perform the scatter. The Slice should have + a size equivalent to halo.numGhost() + halo.numLocal(). The locally owned + elements are expected to appear first (i.e. in the first halo.numLocal() + elements) and the ghosted elements are expected to appear second (i.e. in + the next halo.numGhost() elements()). + + \param mpi_tag The MPI tag to use for non-blocking communication in the + scatter. Note here that if multiple instances of this function are being + called at once then different tags should be used in each function call to + avoid any communication conflicts. +*/ +template +void scatter( const Halo_t& halo, + Slice_t& slice, + int mpi_tag = 1003, + typename std::enable_if<(is_halo::value && + is_slice::value), + int>::type * = 0 ) +{ + // Check that the Slice is the right size. + if ( slice.size() != halo.numLocal() + halo.numGhost() ) + throw std::runtime_error("Slice is the wrong size for scatter!"); + + // Get the number of components in the slice. + int num_comp = 1; + for ( int d = 2; d < slice.rank(); ++d ) + num_comp *= slice.extent(d); + + // Get the raw slice data. + auto slice_data = slice.data(); + + // Allocate a send buffer. Note this one is layout right so the components + // are consecutive. + Kokkos::View + send_buffer( + Kokkos::ViewAllocateWithoutInitializing("halo_send_buffer"), + halo.totalNumImport(), num_comp ); + + // Extract the send buffer from the ghosted elements. + std::size_t num_local = halo.numLocal(); + auto extract_send_buffer_func = + KOKKOS_LAMBDA( const std::size_t i ) + { + std::size_t ghost_idx = i + num_local; + auto s = Slice_t::index_type::s( ghost_idx ); + auto a = Slice_t::index_type::a( ghost_idx ); + std::size_t slice_offset = s*slice.stride(0) + a; + for ( int n = 0; n < num_comp; ++n ) + send_buffer( i, n ) = + slice_data[ slice_offset + Slice_t::vector_length * n ]; + }; + Kokkos::RangePolicy + extract_send_buffer_policy( 0, halo.totalNumImport() ); + Kokkos::parallel_for( "Cabana::scatter::extract_send_buffer", + extract_send_buffer_policy, + extract_send_buffer_func ); + Kokkos::fence(); + + // Allocate a receive buffer. Note this one is layout right so the components + // are consecutive. + Kokkos::View + recv_buffer( + Kokkos::ViewAllocateWithoutInitializing("halo_recv_buffer"), + halo.totalNumExport(), num_comp ); + + // Post non-blocking receives. + int num_n = halo.numNeighbor(); + std::vector requests( num_n ); + std::pair recv_range = { 0, 0 }; + for ( int n = 0; n < num_n; ++n ) + { + recv_range.second = recv_range.first + halo.numExport(n); + + auto recv_subview = + Kokkos::subview( recv_buffer, recv_range, Kokkos::ALL ); + + MPI_Irecv( recv_subview.data(), + recv_subview.size() * sizeof(typename Slice_t::value_type), + MPI_BYTE, + halo.neighborRank(n), + mpi_tag, + halo.comm(), + &(requests[n]) ); + + recv_range.first = recv_range.second; + } + + // Do blocking sends. + std::pair send_range = { 0, 0 }; + for ( int n = 0; n < num_n; ++n ) + { + send_range.second = send_range.first + halo.numImport(n); + + auto send_subview = + Kokkos::subview( send_buffer, send_range, Kokkos::ALL ); + + MPI_Send( send_subview.data(), + send_subview.size() * sizeof(typename Slice_t::value_type), + MPI_BYTE, + halo.neighborRank(n), + mpi_tag, + halo.comm() ); + + send_range.first = send_range.second; + } + + // Wait on non-blocking receives. + std::vector status( num_n ); + const int ec = + MPI_Waitall( requests.size(), requests.data(), status.data() ); + assert( MPI_SUCCESS == ec ); + + // Get the steering vector for the sends. + auto steering = halo.getExportSteering(); + + // Scatter the ghosts in the receive buffer into the local values. + auto scatter_recv_buffer_func = + KOKKOS_LAMBDA( const std::size_t i ) + { + auto s = Slice_t::index_type::s( steering(i) ); + auto a = Slice_t::index_type::a( steering(i) ); + std::size_t slice_offset = s*slice.stride(0) + a; + for ( int n = 0; n < num_comp; ++n ) + Kokkos::atomic_add( + slice_data + slice_offset + Slice_t::vector_length * n, + recv_buffer(i,n) ); + }; + Kokkos::RangePolicy + scatter_recv_buffer_policy( 0, halo.totalNumExport() ); + Kokkos::parallel_for( "Cabana::scatter::scatter_recv_buffer", + scatter_recv_buffer_policy, + scatter_recv_buffer_func ); + Kokkos::fence(); + + // Barrier before completing to ensure synchronization. + MPI_Barrier( halo.comm() ); +} + +//---------------------------------------------------------------------------// + +} // end namespace Cabana + +#endif // end CABANA_HALO_HPP diff --git a/core/unit_test/Cuda/tstHalo_Cuda.cpp b/core/unit_test/Cuda/tstHalo_Cuda.cpp new file mode 100644 index 000000000..d8756f956 --- /dev/null +++ b/core/unit_test/Cuda/tstHalo_Cuda.cpp @@ -0,0 +1,13 @@ +/**************************************************************************** + * Copyright (c) 2018 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#include +#include diff --git a/core/unit_test/OpenMP/tstHalo_OpenMP.cpp b/core/unit_test/OpenMP/tstHalo_OpenMP.cpp new file mode 100644 index 000000000..d2588dd12 --- /dev/null +++ b/core/unit_test/OpenMP/tstHalo_OpenMP.cpp @@ -0,0 +1,13 @@ +/**************************************************************************** + * Copyright (c) 2018 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#include +#include diff --git a/core/unit_test/Pthread/tstHalo_Pthread.cpp b/core/unit_test/Pthread/tstHalo_Pthread.cpp new file mode 100644 index 000000000..bd5db3ecb --- /dev/null +++ b/core/unit_test/Pthread/tstHalo_Pthread.cpp @@ -0,0 +1,13 @@ +/**************************************************************************** + * Copyright (c) 2018 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#include +#include diff --git a/core/unit_test/Serial/tstHalo_Serial.cpp b/core/unit_test/Serial/tstHalo_Serial.cpp new file mode 100644 index 000000000..b0b1f1a45 --- /dev/null +++ b/core/unit_test/Serial/tstHalo_Serial.cpp @@ -0,0 +1,13 @@ +/**************************************************************************** + * Copyright (c) 2018 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#include +#include diff --git a/core/unit_test/mpi_unit_test_main.cpp b/core/unit_test/mpi_unit_test_main.cpp new file mode 100644 index 000000000..9f548cb21 --- /dev/null +++ b/core/unit_test/mpi_unit_test_main.cpp @@ -0,0 +1,27 @@ +/**************************************************************************** + * Copyright (c) 2018 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#include + +#include + +#include + +int main( int argc, char* argv[] ) +{ + MPI_Init( &argc, &argv ); + Kokkos::initialize( argc, argv ); + ::testing::InitGoogleTest( &argc, argv ); + int return_val = RUN_ALL_TESTS(); + Kokkos::finalize(); + MPI_Finalize(); + return return_val; +} diff --git a/core/unit_test/tstHalo.hpp b/core/unit_test/tstHalo.hpp new file mode 100644 index 000000000..36b5c3972 --- /dev/null +++ b/core/unit_test/tstHalo.hpp @@ -0,0 +1,424 @@ +/**************************************************************************** + * Copyright (c) 2018 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#include +#include +#include +#include + +#include + +#include + +#include + +#include +#include + +namespace Test +{ + +//---------------------------------------------------------------------------// +// test without collisions +void test1( const bool use_topology ) +{ + // Make a communication plan. + std::shared_ptr > halo; + + // Get my rank. + int my_rank = -1; + MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); + + // Get my size. + int my_size = -1; + MPI_Comm_size( MPI_COMM_WORLD, &my_size ); + + // Every rank will send ghosts to all other ranks. Send one element to + // each rank including yourself. Interleave the sends. The resulting + // communication plan has ghosts that have one unique destination. + int num_local = 2 * my_size; + Kokkos::View export_ranks_host( + "export_ranks", my_size ); + Kokkos::View export_ids_host( + "export_ids", my_size ); + std::vector neighbors( my_size ); + for ( int n = 0; n < my_size; ++n ) + { + neighbors[n] = n; + export_ranks_host(n) = n; + export_ids_host(n) = 2*n + 1; + } + auto export_ranks = Kokkos::create_mirror_view_and_copy( + TEST_MEMSPACE(), export_ranks_host ); + auto export_ids = Kokkos::create_mirror_view_and_copy( + TEST_MEMSPACE(), export_ids_host ); + + // Create the plan. + if ( use_topology ) + halo = std::make_shared >( + MPI_COMM_WORLD, num_local, export_ids, export_ranks, neighbors ); + else + halo = std::make_shared >( + MPI_COMM_WORLD, num_local, export_ids, export_ranks ); + + // Check the plan. + EXPECT_EQ( halo->numLocal(), num_local ); + EXPECT_EQ( halo->numGhost(), my_size ); + + // Create an AoSoA of local data with space allocated for local data. + using DataTypes = Cabana::MemberTypes; + using AoSoA_t = Cabana::AoSoA; + AoSoA_t data( halo->numLocal() + halo->numGhost() ); + auto slice_int = data.slice<0>(); + auto slice_dbl = data.slice<1>(); + + // Fill the local data. + auto fill_func = + KOKKOS_LAMBDA( const int i ) + { + slice_int(i) = my_rank + 1; + slice_dbl(i,0) = my_rank + 1; + slice_dbl(i,1) = my_rank + 1.5; + }; + Kokkos::RangePolicy + range_policy( 0, num_local ); + Kokkos::parallel_for( range_policy, fill_func ); + Kokkos::fence(); + + // Gather by AoSoA. + Cabana::gather( *halo, data ); + + // Check the results of the gather. + Cabana::AoSoA data_host( + halo->numLocal() + halo->numGhost() ); + auto slice_int_host = data_host.slice<0>(); + auto slice_dbl_host = data_host.slice<1>(); + Cabana::deep_copy( data_host, data ); + + // check that the local data didn't change. + for ( int i = 0; i < my_size; ++i ) + { + EXPECT_EQ( slice_int_host(2*i), my_rank + 1 ); + EXPECT_EQ( slice_dbl_host(2*i,0), my_rank + 1 ); + EXPECT_EQ( slice_dbl_host(2*i,1), my_rank + 1.5 ); + + EXPECT_EQ( slice_int_host(2*i + 1), my_rank + 1 ); + EXPECT_EQ( slice_dbl_host(2*i + 1,0), my_rank + 1 ); + EXPECT_EQ( slice_dbl_host(2*i + 1,1), my_rank + 1.5 ); + } + + // Check that we got one element from everyone. + for ( int i = num_local; i < num_local + my_size; ++i ) + { + // Self sends are first. + int send_rank = i - num_local; + if ( send_rank == 0 ) + { + EXPECT_EQ( slice_int_host(i), my_rank + 1 ); + EXPECT_EQ( slice_dbl_host(i,0), my_rank + 1 ); + EXPECT_EQ( slice_dbl_host(i,1), my_rank + 1.5 ); + } + else if ( send_rank == my_rank ) + { + EXPECT_EQ( slice_int_host(i), 1 ); + EXPECT_EQ( slice_dbl_host(i,0), 1 ); + EXPECT_EQ( slice_dbl_host(i,1), 1.5 ); + } + else + { + EXPECT_EQ( slice_int_host(i), send_rank + 1 ); + EXPECT_EQ( slice_dbl_host(i,0), send_rank + 1 ); + EXPECT_EQ( slice_dbl_host(i,1), send_rank + 1.5 ); + } + } + + // Scatter back the results, + Cabana::scatter( *halo, slice_int ); + Cabana::scatter( *halo, slice_dbl ); + Cabana::deep_copy( data_host, data ); + + // Check that the local data was updated. Every ghost had a unique + // destination so the result should be doubled for those elements that + // were ghosted. + for ( int i = 0; i < my_size; ++i ) + { + EXPECT_EQ( slice_int_host(2*i), my_rank + 1 ); + EXPECT_EQ( slice_dbl_host(2*i,0), my_rank + 1 ); + EXPECT_EQ( slice_dbl_host(2*i,1), my_rank + 1.5 ); + + EXPECT_EQ( slice_int_host(2*i + 1), 2 * (my_rank + 1) ); + EXPECT_EQ( slice_dbl_host(2*i + 1,0), 2 * (my_rank + 1 ) ); + EXPECT_EQ( slice_dbl_host(2*i + 1,1), 2 * (my_rank + 1.5) ); + } + + // Check that the ghost data didn't change. + for ( int i = num_local; i < num_local + my_size; ++i ) + { + // Self sends are first. + int send_rank = i - num_local; + if ( send_rank == 0 ) + { + EXPECT_EQ( slice_int_host(i), my_rank + 1 ); + EXPECT_EQ( slice_dbl_host(i,0), my_rank + 1 ); + EXPECT_EQ( slice_dbl_host(i,1), my_rank + 1.5 ); + } + else if ( send_rank == my_rank ) + { + EXPECT_EQ( slice_int_host(i), 1 ); + EXPECT_EQ( slice_dbl_host(i,0), 1 ); + EXPECT_EQ( slice_dbl_host(i,1), 1.5 ); + } + else + { + EXPECT_EQ( slice_int_host(i), send_rank + 1 ); + EXPECT_EQ( slice_dbl_host(i,0), send_rank + 1 ); + EXPECT_EQ( slice_dbl_host(i,1), send_rank + 1.5 ); + } + } + + // Gather again, this time with slices. + Cabana::gather( *halo, slice_int ); + Cabana::gather( *halo, slice_dbl ); + Cabana::deep_copy( data_host, data ); + + // Check that the local data remained unchanged. + for ( int i = 0; i < my_size; ++i ) + { + EXPECT_EQ( slice_int_host(2*i), my_rank + 1 ); + EXPECT_EQ( slice_dbl_host(2*i,0), my_rank + 1 ); + EXPECT_EQ( slice_dbl_host(2*i,1), my_rank + 1.5 ); + + EXPECT_EQ( slice_int_host(2*i + 1), 2 * (my_rank + 1) ); + EXPECT_EQ( slice_dbl_host(2*i + 1,0), 2 * (my_rank + 1 ) ); + EXPECT_EQ( slice_dbl_host(2*i + 1,1), 2 * (my_rank + 1.5) ); + } + + // Check that the ghost data was updated. + for ( int i = num_local; i < num_local + my_size; ++i ) + { + // Self sends are first. + int send_rank = i - num_local; + if ( send_rank == 0 ) + { + EXPECT_EQ( slice_int_host(i), 2 * (my_rank + 1) ); + EXPECT_EQ( slice_dbl_host(i,0), 2 * (my_rank + 1) ); + EXPECT_EQ( slice_dbl_host(i,1), 2 * (my_rank + 1.5) ); + } + else if ( send_rank == my_rank ) + { + EXPECT_EQ( slice_int_host(i), 2 ); + EXPECT_EQ( slice_dbl_host(i,0), 2 ); + EXPECT_EQ( slice_dbl_host(i,1), 3 ); + } + else + { + EXPECT_EQ( slice_int_host(i), 2 * (send_rank + 1) ); + EXPECT_EQ( slice_dbl_host(i,0), 2 * (send_rank + 1) ); + EXPECT_EQ( slice_dbl_host(i,1), 2 * (send_rank + 1.5) ); + } + } +} + +//---------------------------------------------------------------------------// +// test with collisions +void test2( const bool use_topology ) +{ + // Make a communication plan. + std::shared_ptr > halo; + + // Get my rank. + int my_rank = -1; + MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); + + // Get my size. + int my_size = -1; + MPI_Comm_size( MPI_COMM_WORLD, &my_size ); + + // Every rank will send its single data point as ghosts to all other + // ranks. This will create collisions in the scatter as every rank will + // have data for this rank in the summation. + int num_local = 1; + Kokkos::View export_ranks_host( + "export_ranks", my_size ); + Kokkos::View + export_ids( "export_ids", my_size ); + Kokkos::deep_copy( export_ids, 0 ); + std::vector neighbors( my_size ); + for ( int n = 0; n < my_size; ++n ) + { + neighbors[n] = n; + export_ranks_host(n) = n; + } + auto export_ranks = Kokkos::create_mirror_view_and_copy( + TEST_MEMSPACE(), export_ranks_host ); + + // Create the plan. + if ( use_topology ) + halo = std::make_shared >( + MPI_COMM_WORLD, num_local, export_ids, export_ranks, neighbors ); + else + halo = std::make_shared >( + MPI_COMM_WORLD, num_local, export_ids, export_ranks ); + + // Check the plan. + EXPECT_EQ( halo->numLocal(), num_local ); + EXPECT_EQ( halo->numGhost(), my_size ); + + // Create an AoSoA of local data with space allocated for local data. + using DataTypes = Cabana::MemberTypes; + using AoSoA_t = Cabana::AoSoA; + AoSoA_t data( halo->numLocal() + halo->numGhost() ); + auto slice_int = data.slice<0>(); + auto slice_dbl = data.slice<1>(); + + // Fill the local data. + auto fill_func = + KOKKOS_LAMBDA( const int i ) + { + slice_int(i) = my_rank + 1; + slice_dbl(i,0) = my_rank + 1; + slice_dbl(i,1) = my_rank + 1.5; + }; + Kokkos::RangePolicy + range_policy( 0, num_local ); + Kokkos::parallel_for( range_policy, fill_func ); + Kokkos::fence(); + + // Gather by AoSoA. + Cabana::gather( *halo, data ); + + // Check the results of the gather. + Cabana::AoSoA data_host( + halo->numLocal() + halo->numGhost() ); + auto slice_int_host = data_host.slice<0>(); + auto slice_dbl_host = data_host.slice<1>(); + Cabana::deep_copy( data_host, data ); + + // check that the local data didn't change. + EXPECT_EQ( slice_int_host(0), my_rank + 1 ); + EXPECT_EQ( slice_dbl_host(0,0), my_rank + 1 ); + EXPECT_EQ( slice_dbl_host(0,1), my_rank + 1.5 ); + + // Check that we got one element from everyone. + for ( int i = num_local; i < num_local + my_size; ++i ) + { + // Self sends are first. + int send_rank = i - num_local; + if ( send_rank == 0 ) + { + EXPECT_EQ( slice_int_host(i), my_rank + 1 ); + EXPECT_EQ( slice_dbl_host(i,0), my_rank + 1 ); + EXPECT_EQ( slice_dbl_host(i,1), my_rank + 1.5 ); + } + else if ( send_rank == my_rank ) + { + EXPECT_EQ( slice_int_host(i), 1 ); + EXPECT_EQ( slice_dbl_host(i,0), 1 ); + EXPECT_EQ( slice_dbl_host(i,1), 1.5 ); + } + else + { + EXPECT_EQ( slice_int_host(i), send_rank + 1 ); + EXPECT_EQ( slice_dbl_host(i,0), send_rank + 1 ); + EXPECT_EQ( slice_dbl_host(i,1), send_rank + 1.5 ); + } + } + + // Scatter back the results, + Cabana::scatter( *halo, slice_int ); + Cabana::scatter( *halo, slice_dbl ); + Cabana::deep_copy( data_host, data ); + + // Check that the local data was updated. Every ghost was sent to all of + // the ranks so the result should be multiplied by the number of ranks. + EXPECT_EQ( slice_int_host(0), (my_size + 1) * (my_rank + 1) ); + EXPECT_EQ( slice_dbl_host(0,0), (my_size + 1) * (my_rank + 1 ) ); + EXPECT_EQ( slice_dbl_host(0,1), (my_size + 1) * (my_rank + 1.5) ); + + // Check that the ghost data didn't change. + for ( int i = num_local; i < num_local + my_size; ++i ) + { + // Self sends are first. + int send_rank = i - num_local; + if ( send_rank == 0 ) + { + EXPECT_EQ( slice_int_host(i), my_rank + 1 ); + EXPECT_EQ( slice_dbl_host(i,0), my_rank + 1 ); + EXPECT_EQ( slice_dbl_host(i,1), my_rank + 1.5 ); + } + else if ( send_rank == my_rank ) + { + EXPECT_EQ( slice_int_host(i), 1 ); + EXPECT_EQ( slice_dbl_host(i,0), 1 ); + EXPECT_EQ( slice_dbl_host(i,1), 1.5 ); + } + else + { + EXPECT_EQ( slice_int_host(i), send_rank + 1 ); + EXPECT_EQ( slice_dbl_host(i,0), send_rank + 1 ); + EXPECT_EQ( slice_dbl_host(i,1), send_rank + 1.5 ); + } + } + + // Gather again, this time with slices. + Cabana::gather( *halo, slice_int ); + Cabana::gather( *halo, slice_dbl ); + Cabana::deep_copy( data_host, data ); + + // Check that the local data remained unchanged. + EXPECT_EQ( slice_int_host(0), (my_size + 1) * (my_rank + 1) ); + EXPECT_EQ( slice_dbl_host(0,0), (my_size + 1) * (my_rank + 1 ) ); + EXPECT_EQ( slice_dbl_host(0,1), (my_size + 1) * (my_rank + 1.5) ); + + // Check that the ghost data was updated. + for ( int i = num_local; i < num_local + my_size; ++i ) + { + // Self sends are first. + int send_rank = i - num_local; + if ( send_rank == 0 ) + { + EXPECT_EQ( slice_int_host(i), (my_size + 1) * (my_rank + 1) ); + EXPECT_EQ( slice_dbl_host(i,0), (my_size + 1) * (my_rank + 1) ); + EXPECT_EQ( slice_dbl_host(i,1), (my_size + 1) * (my_rank + 1.5) ); + } + else if ( send_rank == my_rank ) + { + EXPECT_EQ( slice_int_host(i), (my_size + 1) ); + EXPECT_EQ( slice_dbl_host(i,0), (my_size + 1) ); + EXPECT_EQ( slice_dbl_host(i,1), (my_size + 1) * 1.5 ); + } + else + { + EXPECT_EQ( slice_int_host(i), (my_size + 1) * (send_rank + 1) ); + EXPECT_EQ( slice_dbl_host(i,0), (my_size + 1) * (send_rank + 1) ); + EXPECT_EQ( slice_dbl_host(i,1), (my_size + 1) * (send_rank + 1.5) ); + } + } +} + +//---------------------------------------------------------------------------// +// RUN TESTS +//---------------------------------------------------------------------------// +TEST_F( TEST_CATEGORY, halo_test_1 ) +{ test1(true); } + +TEST_F( TEST_CATEGORY, halo_test_1_no_topo ) +{ test1(false); } + +TEST_F( TEST_CATEGORY, halo_test_2 ) +{ test2(true); } + +TEST_F( TEST_CATEGORY, halo_test_2_no_topo ) +{ test2(false); } + +//---------------------------------------------------------------------------// + +} // end namespace Test