diff --git a/docs/source/backends/adios2.rst b/docs/source/backends/adios2.rst index de6b47aaad..13e357022c 100644 --- a/docs/source/backends/adios2.rst +++ b/docs/source/backends/adios2.rst @@ -105,6 +105,11 @@ The default behavior may be restored by setting the :ref:`JSON parameter ` ``adios2.attribute_writing_ranks`` can be used to restrict attribute writing to only a select handful of ranks (most typically a single one). +The ADIOS2 backend of the openPMD-api will then ignore attributes from all other MPI ranks. + +.. tip:: + + Treat metadata specification as a collective operation in order to retain compatibility with HDF5, and then specify ``adios2.attribute_writing_ranks = 0`` in order to achieve best performance in ADIOS2. + +.. warning:: + + The ADIOS2 backend may also use attributes to encode openPMD groups (ref. "group table"). + The ``adios2.attribute_writing_ranks`` key also applies to those attributes, i.e. also group creation must be treated as collective then (at least on the specified ranks). + Experimental group table feature -------------------------------- diff --git a/docs/source/details/adios2.json b/docs/source/details/adios2.json index d71061c0bc..c817eb0d7a 100644 --- a/docs/source/details/adios2.json +++ b/docs/source/details/adios2.json @@ -2,6 +2,7 @@ "adios2": { "engine": { "type": "sst", + "preferred_flush_target": "disk", "parameters": { "BufferGrowthFactor": "2.0", "QueueLimit": "2" @@ -17,6 +18,7 @@ } } ] - } + }, + "attribute_writing_ranks": 0 } } diff --git a/docs/source/details/adios2.toml b/docs/source/details/adios2.toml index 863a5021fa..20ef9e827f 100644 --- a/docs/source/details/adios2.toml +++ b/docs/source/details/adios2.toml @@ -1,5 +1,12 @@ +[adios2] +# ignore all attribute writes not issued on these ranks +# can also be a list if multiple ranks need to be given +# however rank 0 should be the most common option here +attribute_writing_ranks = 0 + [adios2.engine] type = "sst" +preferred_flush_target = "disk" [adios2.engine.parameters] BufferGrowthFactor = "2.0" diff --git a/docs/source/details/backendconfig.rst b/docs/source/details/backendconfig.rst index f1c47c78bd..57d577af10 100644 --- a/docs/source/details/backendconfig.rst +++ b/docs/source/details/backendconfig.rst @@ -113,7 +113,7 @@ A full configuration of the ADIOS2 backend: .. literalinclude:: adios2.toml :language: toml -All keys found under ``adios2.dataset`` are applicable globally as well as per dataset, keys found under ``adios2.engine`` only globally. +All keys found under ``adios2.dataset`` are applicable globally as well as per dataset, any other keys such as those found under ``adios2.engine`` only globally. Explanation of the single keys: * ``adios2.engine.type``: A string that is passed directly to ``adios2::IO:::SetEngine`` for choosing the ADIOS2 engine to be used. @@ -142,6 +142,11 @@ Explanation of the single keys: The openPMD-api will automatically use a fallback implementation for the span-based Put() API if any operator is added to a dataset. This workaround is enabled on a per-dataset level. The workaround can be completely deactivated by specifying ``{"adios2": {"use_span_based_put": true}}`` or it can alternatively be activated indiscriminately for all datasets by specifying ``{"adios2": {"use_span_based_put": false}}``. +* ``adios2.attribute_writing_ranks``: A list of MPI ranks that define metadata. ADIOS2 attributes will be written only from those ranks, any other ranks will be ignored. Can be either a list of integers or a single integer. + +.. hint:: + + Specifying ``adios2.attribute_writing_ranks`` can lead to serious serialization performance improvements at large scale. Operations specified inside ``adios2.dataset.operators`` will be applied to ADIOS2 datasets in writing as well as in reading. Beginning with ADIOS2 2.8.0, this can be used to specify decompressor settings: diff --git a/docs/source/details/mpi.rst b/docs/source/details/mpi.rst index b94e886fac..ea4ec0551e 100644 --- a/docs/source/details/mpi.rst +++ b/docs/source/details/mpi.rst @@ -42,6 +42,8 @@ Functionality Behavior Description If you want to support all backends equally, treat as a collective operation. Note that openPMD represents constant record components with attributes, thus inheriting this for ``::makeConstant``. + When treating attribute definitions as collective, we advise specifying the ADIOS2 :ref:`JSON/TOML key ` ``adios2.attribute_writing_ranks`` for metadata aggregation scalabilty, typically as ``adios2.attribute_writing_ranks = 0``. + .. [4] We usually open iterations delayed on first access. This first access is usually the ``flush()`` call after a ``storeChunk``/``loadChunk`` operation. If the first access is non-collective, an explicit, collective ``Iteration::open()`` can be used to have the files already open. Alternatively, iterations might be accessed for the first time by immediate operations such as ``::availableChunks()``. diff --git a/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp b/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp index e63c6a493b..cdd7983312 100644 --- a/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp +++ b/include/openPMD/IO/ADIOS/ADIOS2IOHandler.hpp @@ -274,6 +274,8 @@ class ADIOS2IOHandlerImpl return m_useGroupTable.value(); } + bool m_writeAttributesFromThisRank = true; + struct ParameterizedOperator { adios2::Operator op; @@ -285,7 +287,9 @@ class ADIOS2IOHandlerImpl json::TracingJSON m_config; static json::TracingJSON nullvalue; - void init(json::TracingJSON config); + template + void + init(json::TracingJSON config, Callback &&callbackWriteAttributesFromRank); template json::TracingJSON config(Key &&key, json::TracingJSON &cfg) diff --git a/src/IO/ADIOS/ADIOS2IOHandler.cpp b/src/IO/ADIOS/ADIOS2IOHandler.cpp index 8cddd578a0..534e364f53 100644 --- a/src/IO/ADIOS/ADIOS2IOHandler.cpp +++ b/src/IO/ADIOS/ADIOS2IOHandler.cpp @@ -79,7 +79,43 @@ ADIOS2IOHandlerImpl::ADIOS2IOHandlerImpl( , m_engineType(std::move(engineType)) , m_userSpecifiedExtension{std::move(specifiedExtension)} { - init(std::move(cfg)); + init( + std::move(cfg), + /* callbackWriteAttributesFromRank = */ + [communicator, this](nlohmann::json const &attribute_writing_ranks) { + int rank = 0; + MPI_Comm_rank(communicator, &rank); + auto throw_error = []() { + throw error::BackendConfigSchema( + {"adios2", "attribute_writing_ranks"}, + "Type must be either an integer or an array of integers."); + }; + if (attribute_writing_ranks.is_array()) + { + m_writeAttributesFromThisRank = false; + for (auto const &val : attribute_writing_ranks) + { + if (!val.is_number()) + { + throw_error(); + } + if (val.get() == rank) + { + m_writeAttributesFromThisRank = true; + break; + } + } + } + else if (attribute_writing_ranks.is_number()) + { + m_writeAttributesFromThisRank = + attribute_writing_ranks.get() == rank; + } + else + { + throw_error(); + } + }); } #endif // openPMD_HAVE_MPI @@ -94,7 +130,7 @@ ADIOS2IOHandlerImpl::ADIOS2IOHandlerImpl( , m_engineType(std::move(engineType)) , m_userSpecifiedExtension(std::move(specifiedExtension)) { - init(std::move(cfg)); + init(std::move(cfg), [](auto const &...) {}); } ADIOS2IOHandlerImpl::~ADIOS2IOHandlerImpl() @@ -135,7 +171,9 @@ ADIOS2IOHandlerImpl::~ADIOS2IOHandlerImpl() } } -void ADIOS2IOHandlerImpl::init(json::TracingJSON cfg) +template +void ADIOS2IOHandlerImpl::init( + json::TracingJSON cfg, Callback &&callbackWriteAttributesFromRank) { // allow overriding through environment variable m_engineType = @@ -181,6 +219,12 @@ void ADIOS2IOHandlerImpl::init(json::TracingJSON cfg) : ModifiableAttributes::No; } + if (m_config.json().contains("attribute_writing_ranks")) + { + callbackWriteAttributesFromRank( + m_config["attribute_writing_ranks"].json()); + } + auto engineConfig = config(ADIOS2Defaults::str_engine); if (!engineConfig.json().is_null()) { @@ -915,6 +959,10 @@ void ADIOS2IOHandlerImpl::writeDataset( void ADIOS2IOHandlerImpl::writeAttribute( Writable *writable, const Parameter ¶meters) { + if (!m_writeAttributesFromThisRank) + { + return; + } #if openPMD_HAS_ADIOS_2_9 switch (useGroupTable()) { @@ -3033,7 +3081,11 @@ namespace detail if (!initializedDefaults) { // Currently only schema 0 supported - m_IO.DefineAttribute(ADIOS2Defaults::str_adios2Schema, 0); + if (m_impl->m_writeAttributesFromThisRank) + { + m_IO.DefineAttribute( + ADIOS2Defaults::str_adios2Schema, 0); + } initializedDefaults = true; } @@ -3168,7 +3220,8 @@ namespace detail { if (writeOnly(m_mode) && !m_IO.InquireAttribute( - ADIOS2Defaults::str_usesstepsAttribute)) + ADIOS2Defaults::str_usesstepsAttribute) && + m_impl->m_writeAttributesFromThisRank) { m_IO.DefineAttribute( ADIOS2Defaults::str_usesstepsAttribute, 0); @@ -3189,7 +3242,8 @@ namespace detail */ if (calledExplicitly && writeOnly(m_mode) && !m_IO.InquireAttribute( - ADIOS2Defaults::str_usesstepsAttribute)) + ADIOS2Defaults::str_usesstepsAttribute) && + m_impl->m_writeAttributesFromThisRank) { m_IO.DefineAttribute( ADIOS2Defaults::str_usesstepsAttribute, 1); @@ -3356,7 +3410,7 @@ namespace detail case UseGroupTable::Yes: #if openPMD_HAS_ADIOS_2_9 { - if (writeOnly(m_mode)) + if (writeOnly(m_mode) && m_impl->m_writeAttributesFromThisRank) { requireActiveStep(); auto currentStepBuffered = currentStep(); diff --git a/test/ParallelIOTest.cpp b/test/ParallelIOTest.cpp index eb9765375a..ce7d6cc565 100644 --- a/test/ParallelIOTest.cpp +++ b/test/ParallelIOTest.cpp @@ -1,6 +1,7 @@ /* Running this test in parallel with MPI requires MPI::Init. * To guarantee a correct call to Init, launch the tests manually. */ +#include "openPMD/IO/ADIOS/macros.hpp" #include "openPMD/auxiliary/Environment.hpp" #include "openPMD/auxiliary/Filesystem.hpp" #include "openPMD/openPMD.hpp" @@ -1170,10 +1171,16 @@ clevel = "1" doshuffle = "BLOSC_BITSHUFFLE" )END"; - std::string writeConfigBP4 = R"END( + std::string writeConfigBP4 = + R"END( [adios2] unused = "parameter" - +attribute_writing_ranks = 0 +)END" +#if openPMD_HAS_ADIOS_2_9 + "use_group_table = true" +#endif + R"END( [adios2.engine] type = "bp4" unused = "as well"