diff --git a/bindings/C/adios2/c/adios2_c_io.h b/bindings/C/adios2/c/adios2_c_io.h index 09fa10e256..c790cd0647 100644 --- a/bindings/C/adios2/c/adios2_c_io.h +++ b/bindings/C/adios2/c/adios2_c_io.h @@ -329,8 +329,8 @@ adios2_error adios2_remove_all_attributes(adios2_io *io); * MPI Collective function as it calls MPI_Comm_dup * @param io engine owner * @param name unique engine identifier - * @param mode adios2_mode_write, adios2_mode_read, adios2_mode_append, and - * adios2_mode_readRandomAccess + * @param mode adios2_mode_write, adios2_mode_read, adios2_mode_append + * and adios2_mode_readRandomAccess * @return success: handler, failure: NULL */ adios2_engine *adios2_open(adios2_io *io, const char *name, const adios2_mode mode); @@ -341,7 +341,7 @@ adios2_engine *adios2_open(adios2_io *io, const char *name, const adios2_mode mo * MPI Collective function as it calls MPI_Comm_dup * @param io engine owner * @param name unique engine identifier - * @param mode adios2_mode_write, adios2_mode_read, adios2_mode_append, and + * @param mode adios2_mode_write, adios2_mode_read, adios2_mode_append and * adios2_mode_readRandomAccess * @param comm communicator other than adios' handler comm. MPI only. * @return success: handler, failure: NULL diff --git a/bindings/CXX11/adios2/cxx11/Variable.h b/bindings/CXX11/adios2/cxx11/Variable.h index b2263a5780..ded48baded 100644 --- a/bindings/CXX11/adios2/cxx11/Variable.h +++ b/bindings/CXX11/adios2/cxx11/Variable.h @@ -214,7 +214,7 @@ class Variable * variable.Count() = {Ny,Nx}, then memoryCount = {Ny+2,Nx+2} * */ - void SetMemorySelection(const adios2::Box &memorySelection); + void SetMemorySelection(const adios2::Box &memorySelection = {{}, {}}); /** * Sets a step selection modifying current startStep, countStep diff --git a/docs/user_guide/source/components/anatomy.rst b/docs/user_guide/source/components/anatomy.rst index 0832b6dc3c..27f325b646 100644 --- a/docs/user_guide/source/components/anatomy.rst +++ b/docs/user_guide/source/components/anatomy.rst @@ -114,7 +114,10 @@ named `adios2::Mode::ReadRandomAccess`. `adios2::Mode::Read` mode allows data ac current timestep. `ReadRandomAccess` can only be used with file engines and involves loading all the file metadata at once. So it can be more memory intensive than `adios2::Mode::Read` mode, but allows reading data from any timestep using `SetStepSelection()`. If you use `adios2::Mode::ReadRandomAccess` mode, be sure to allocate enough memory to hold -multiple steps of the variable content. +multiple steps of the variable content. Note that ADIOS streaming +engines (like SST, DataMan, etc.) do not support `ReadRandomAccess` +mode. Also newer file Engines like BP5 to not allow +`BeginStep/EndStep` calls in `ReadRandomAccess` mode. .. code:: C++ @@ -134,3 +137,35 @@ multiple steps of the variable content. | |--> IO goes out of scope | |--> ADIOS goes out of scope or adios2_finalize() + +Previously we explored how to read using the input mode `adios2::Mode::Read`. Nonetheless, ADIOS has another input mode +named `adios2::Mode::ReadRandomAccess`. `adios2::Mode::Read` mode allows data access only timestep by timestep using +`BeginStep/EndStep`, but generally it is more memory efficient as ADIOS is only required to load metadata for the +current timestep. `ReadRandomAccess` can only be used with file engines and involves loading all the file metadata at +once. So it can be more memory intensive than `adios2::Mode::Read` mode, but allows reading data from any timestep using +`SetStepSelection()`. If you use `adios2::Mode::ReadRandomAccess` mode, be sure to allocate enough memory to hold +multiple steps of the variable content. Note that ADIOS streaming +engines (like SST, DataMan, etc.) do not support `ReadRandomAccess` +mode. Also newer file Engines like BP5 to not allow +`BeginStep/EndStep` calls in `ReadRandomAccess` mode. + +.. code:: C++ + + ADIOS adios("config.xml", MPI_COMM_WORLD); + | + | IO io = adios.DeclareIO(...); + | | + | | Engine e = io.Open("InputFileName.bp", adios2::Mode::ReadRandomAccess); + | | | + | | | Variable var = io.InquireVariable(...) + | | | | var.SetStepSelection() + | | | | e.Get(var, datapointer); + | | | | + | | | + | | e.Close(); + | | + | |--> IO goes out of scope + | + |--> ADIOS goes out of scope or adios2_finalize() + + diff --git a/docs/user_guide/source/components/io.rst b/docs/user_guide/source/components/io.rst index 326b93ad34..fefebe7f5f 100644 --- a/docs/user_guide/source/components/io.rst +++ b/docs/user_guide/source/components/io.rst @@ -196,7 +196,7 @@ A particular ``Engine`` type is set to the current ``IO`` component with the ``I Engine polymorphism is handled internally by the ``IO`` class, which allows subclassing future derived ``Engine`` types without changing the basic API. ``Engine`` objects are created in various modes. -The available modes are ``adios2::Mode::Read``, ``adios2::Mode::Write``, ``adios2::Mode::Append``, ``adios2::Mode::Sync``, ``adios2::Mode::Deferred``, and ``adios2::Mode::Undefined``. +The available modes are ``adios2::Mode::Read``, ``adios2::Mode::ReadRandomAccess``, ``adios2::Mode::Write``, ``adios2::Mode::Append``, ``adios2::Mode::Sync``, ``adios2::Mode::Deferred``, and ``adios2::Mode::Undefined``. .. code-block:: c++ diff --git a/docs/user_guide/source/engines/bp5.rst b/docs/user_guide/source/engines/bp5.rst index b29ec71eb5..76696d427a 100644 --- a/docs/user_guide/source/engines/bp5.rst +++ b/docs/user_guide/source/engines/bp5.rst @@ -130,6 +130,14 @@ This engine allows the user to fine tune the buffering operations through the fo #. **Threads**: Read side: Specify how many threads one process can use to speed up reading. The default value is *0*, to let the engine estimate the number of threads based on how many processes are running on the compute node and how many hardware threads are available on the compute node but it will use maximum 16 threads. Value *1* forces the engine to read everything within the main thread of the process. Other values specify the exact number of threads the engine can use. Although multithreaded reading works in a single *Get(adios2::Mode::Sync)* call if the read selection spans multiple data blocks in the file, the best parallelization is achieved by using deferred mode and reading everything in *PerformGets()/EndStep()*. + #. **FlattenSteps**: This is a writer-side parameter specifies that the + reader should interpret multiple writer-created timesteps as a + single timestep, essentially flattening all Put()s into a single step. + + #. **IgnoreFlattenSteps**: This is a reader-side parameter that + tells the reader to ignore any FlattenSteps parameter supplied + to the writer. + ============================== ===================== =========================================================== **Key** **Value Format** **Default** and Examples ============================== ===================== =========================================================== @@ -156,6 +164,8 @@ This engine allows the user to fine tune the buffering operations through the fo StatsLevel integer, 0 or 1 **1**, 0 MaxOpenFilesAtOnce integer >= 0 **UINT_MAX**, 1024, 1 Threads integer >= 0 **0**, 1, 32 + FlattenSteps boolean **off**, on, true, false + IgnoreFlattenSteps boolean **off**, on, true, false ============================== ===================== =========================================================== diff --git a/docs/user_guide/source/setting_up/source/cmake.rst b/docs/user_guide/source/setting_up/source/cmake.rst index d268fdd764..ce5b44dd2a 100644 --- a/docs/user_guide/source/setting_up/source/cmake.rst +++ b/docs/user_guide/source/setting_up/source/cmake.rst @@ -8,7 +8,7 @@ To build ADIOS v2.x, clone the repository and invoke the canonical CMake build s $ git clone https://github.com/ornladios/ADIOS2.git ADIOS2 $ mkdir adios2-build && cd adios2-build - $ cmake ../ADIOS2 cmake -DADIOS2_BUILD_EXAMPLES=ON + $ cmake ../ADIOS2 -DADIOS2_BUILD_EXAMPLES=ON -- The C compiler identification is GNU 9.4.0 -- The CXX compiler identification is GNU 9.4.0 ... @@ -111,10 +111,11 @@ Optionally, run the tests (need to configure with ``-DBUILD_TESTING=ON`` cmake f Total Test time (real) = 95.95 sec -And finally, use the standard invocation to install: +And finally, use the standard invocation to install (setting the install path beforehand): .. code-block:: bash + $ cmake ../ADIOS2 -DCMAKE_INSTALL_PREFIX=/path/to/where/adios/will/be/installed $ make install diff --git a/docs/user_guide/source/tutorials/helloWorld.rst b/docs/user_guide/source/tutorials/helloWorld.rst index a0e424a097..cdc709eea0 100644 --- a/docs/user_guide/source/tutorials/helloWorld.rst +++ b/docs/user_guide/source/tutorials/helloWorld.rst @@ -86,6 +86,7 @@ Start editing the skeleton file `ADIOS2/examples/hello/helloWorld/hello-world_tu adios2::Engine reader = io.Open("hello-world-cpp.bp", adios2::Mode::Read); std::string greeting; + reader.BeginStep(); reader.Get(varGreeting, greeting); reader.EndStep(); reader.Close(); @@ -93,7 +94,7 @@ Start editing the skeleton file `ADIOS2/examples/hello/helloWorld/hello-world_tu .. note:: - The ``BeginStep`` and ``EndStep`` calls are required when **reading** one step and multiple steps. We will see in + In Mode::Read, the ``BeginStep`` and ``EndStep`` calls are required when **reading** one step and multiple steps. We will see in another tutorial how to read multiple steps. It's important to note that the ``BeginStep`` should be called **before** all ``Inquire*`` / ``Available*`` function calls. diff --git a/source/adios2/common/ADIOSMacros.h b/source/adios2/common/ADIOSMacros.h index 8c5f719491..aac757ad6d 100644 --- a/source/adios2/common/ADIOSMacros.h +++ b/source/adios2/common/ADIOSMacros.h @@ -33,6 +33,7 @@ */ #define ADIOS2_FOREACH_ATTRIBUTE_PRIMITIVE_STDTYPE_1ARG(MACRO) \ + MACRO(char) \ MACRO(int8_t) \ MACRO(int16_t) \ MACRO(int32_t) \ @@ -43,8 +44,7 @@ MACRO(uint64_t) \ MACRO(float) \ MACRO(double) \ - MACRO(long double) \ - MACRO(char) + MACRO(long double) #define ADIOS2_FOREACH_PRIMITIVE_STDTYPE_1ARG(MACRO) \ ADIOS2_FOREACH_ATTRIBUTE_PRIMITIVE_STDTYPE_1ARG(MACRO) \ @@ -52,15 +52,16 @@ MACRO(std::complex) #define ADIOS2_FOREACH_ATTRIBUTE_STDTYPE_1ARG(MACRO) \ - ADIOS2_FOREACH_PRIMITIVE_STDTYPE_1ARG(MACRO) \ - MACRO(std::string) + MACRO(std::string) \ + ADIOS2_FOREACH_PRIMITIVE_STDTYPE_1ARG(MACRO) #define ADIOS2_FOREACH_STDTYPE_1ARG(MACRO) \ - ADIOS2_FOREACH_PRIMITIVE_STDTYPE_1ARG(MACRO) \ - MACRO(std::string) + MACRO(std::string) \ + ADIOS2_FOREACH_PRIMITIVE_STDTYPE_1ARG(MACRO) #define ADIOS2_FOREACH_TYPE_1ARG(MACRO) \ MACRO(std::string) \ + MACRO(char) \ MACRO(signed char) \ MACRO(unsigned char) \ MACRO(short) \ @@ -75,10 +76,10 @@ MACRO(double) \ MACRO(long double) \ MACRO(std::complex) \ - MACRO(std::complex) \ - MACRO(char) + MACRO(std::complex) #define ADIOS2_FOREACH_PRIMITIVE_TYPE_1ARG(MACRO) \ + MACRO(char) \ MACRO(signed char) \ MACRO(unsigned char) \ MACRO(short) \ @@ -93,8 +94,7 @@ MACRO(double) \ MACRO(long double) \ MACRO(std::complex) \ - MACRO(std::complex) \ - MACRO(char) + MACRO(std::complex) #define ADIOS2_FOREACH_COMPLEX_PRIMITIVE_TYPE_1ARG(MACRO) \ MACRO(float) \ @@ -102,9 +102,9 @@ MACRO(long double) #define ADIOS2_FOREACH_CHAR_TYPE_1ARG(MACRO) \ + MACRO(char) \ MACRO(signed char) \ - MACRO(unsigned char) \ - MACRO(char) + MACRO(unsigned char) #define ADIOS2_FOREACH_NUMERIC_TYPE_1ARG(MACRO) \ MACRO(short) \ @@ -123,6 +123,7 @@ #define ADIOS2_FOREACH_ATTRIBUTE_TYPE_1ARG(MACRO) \ MACRO(std::string) \ + MACRO(char) \ MACRO(signed char) \ MACRO(unsigned char) \ MACRO(short) \ @@ -137,10 +138,10 @@ MACRO(double) \ MACRO(long double) \ MACRO(std::complex) \ - MACRO(std::complex) \ - MACRO(char) + MACRO(std::complex) #define ADIOS2_FOREACH_ATTRIBUTE_PRIMITIVE_TYPE_1ARG(MACRO) \ + MACRO(char) \ MACRO(signed char) \ MACRO(unsigned char) \ MACRO(short) \ @@ -155,8 +156,7 @@ MACRO(double) \ MACRO(long double) \ MACRO(std::complex) \ - MACRO(std::complex) \ - MACRO(char) + MACRO(std::complex) /**
@@ -185,6 +185,7 @@
 #define ADIOS2_FOREACH_ATTRIBUTE_STDTYPE_2ARGS(MACRO)                                              \
     MACRO(std::string, string)                                                                     \
     MACRO(int8_t, int8)                                                                            \
+    MACRO(char, char)                                                                              \
     MACRO(uint8_t, uint8)                                                                          \
     MACRO(int16_t, int16)                                                                          \
     MACRO(uint16_t, uint16)                                                                        \
@@ -196,11 +197,11 @@
     MACRO(double, double)                                                                          \
     MACRO(long double, ldouble)                                                                    \
     MACRO(std::complex, cfloat)                                                             \
-    MACRO(std::complex, cdouble)                                                           \
-    MACRO(char, char)
+    MACRO(std::complex, cdouble)
 
 #define ADIOS2_FOREACH_PRIMITVE_STDTYPE_2ARGS(MACRO)                                               \
     MACRO(int8_t, int8)                                                                            \
+    MACRO(char, char)                                                                              \
     MACRO(uint8_t, uint8)                                                                          \
     MACRO(int16_t, int16)                                                                          \
     MACRO(uint16_t, uint16)                                                                        \
@@ -212,8 +213,7 @@
     MACRO(double, double)                                                                          \
     MACRO(long double, ldouble)                                                                    \
     MACRO(std::complex, cfloat)                                                             \
-    MACRO(std::complex, cdouble)                                                           \
-    MACRO(char, char)
+    MACRO(std::complex, cdouble)
 
 #define ADIOS2_FOREACH_MINMAX_STDTYPE_2ARGS(MACRO)                                                 \
     MACRO(int8_t, int8)                                                                            \
diff --git a/source/adios2/core/VariableBase.cpp b/source/adios2/core/VariableBase.cpp
index 2eb3442c0c..0bb9311471 100644
--- a/source/adios2/core/VariableBase.cpp
+++ b/source/adios2/core/VariableBase.cpp
@@ -221,6 +221,13 @@ void VariableBase::SetMemorySelection(const Box &memorySelection)
     const Dims &memoryStart = memorySelection.first;
     const Dims &memoryCount = memorySelection.second;
 
+    if (memoryStart.empty() && memoryCount.empty())
+    {
+        m_MemoryStart.clear();
+        m_MemoryCount.clear();
+        return;
+    }
+
     if (m_SingleValue)
     {
         helper::Throw("Core", "VariableBase", "SetMemorySelection",
diff --git a/source/adios2/engine/bp5/BP5Engine.h b/source/adios2/engine/bp5/BP5Engine.h
index 354ec67949..42464119a3 100644
--- a/source/adios2/engine/bp5/BP5Engine.h
+++ b/source/adios2/engine/bp5/BP5Engine.h
@@ -56,15 +56,33 @@ class BP5Engine
 
     format::BufferSTL m_MetadataIndex;
 
-    /** Positions of flags in Index Table Header that Reader uses */
-    static constexpr size_t m_IndexHeaderSize = 64;
-    static constexpr size_t m_EndianFlagPosition = 36;
-    static constexpr size_t m_BPVersionPosition = 37;
-    static constexpr size_t m_BPMinorVersionPosition = 38;
-    static constexpr size_t m_ActiveFlagPosition = 39;
-    static constexpr size_t m_ColumnMajorFlagPosition = 40;
-    static constexpr size_t m_VersionTagPosition = 0;
-    static constexpr size_t m_VersionTagLength = 32;
+    /** Positions of flags in Index Table Header that Reader uses - MUST BE 64 bytes total */
+    struct BP5IndexTableHeader
+    {
+        char VersionTag[32];
+        uint8_t adiosMajorVersion;
+        uint8_t adiosMinorVersion;
+        uint8_t adiosPatchVersion;
+        uint8_t unused1;        // init to zero
+        uint8_t isLittleEndian; // boolean
+        uint8_t bpVersion;      // 5 here
+        uint8_t bpMinorVersion;
+        uint8_t activeFlag;
+        char columnMajor;     // y or n
+        uint8_t flattenSteps; // writer requests all steps flattened to one on read
+        char unused2[22];     // init to zero
+    };
+    static constexpr size_t m_IndexHeaderSize = sizeof(BP5IndexTableHeader);
+    static constexpr size_t m_EndianFlagPosition = offsetof(BP5IndexTableHeader, isLittleEndian);
+    static constexpr size_t m_BPVersionPosition = offsetof(BP5IndexTableHeader, bpVersion);
+    static constexpr size_t m_BPMinorVersionPosition =
+        offsetof(BP5IndexTableHeader, bpMinorVersion);
+    static constexpr size_t m_ActiveFlagPosition = offsetof(BP5IndexTableHeader, activeFlag);
+    static constexpr size_t m_ColumnMajorFlagPosition = offsetof(BP5IndexTableHeader, columnMajor);
+    static constexpr size_t m_FlattenStepsPosition = offsetof(BP5IndexTableHeader, flattenSteps);
+    static constexpr size_t m_VersionTagPosition = offsetof(BP5IndexTableHeader, VersionTag);
+    static constexpr size_t m_VersionTagLength = sizeof(BP5IndexTableHeader().VersionTag);
+    static constexpr size_t m_HeaderTailPadding = sizeof(BP5IndexTableHeader().unused2);
 
     static constexpr uint8_t m_BP5MinorVersion = 2;
 
@@ -155,7 +173,9 @@ class BP5Engine
     MACRO(Threads, UInt, unsigned int, 0)                                                          \
     MACRO(UseOneTimeAttributes, Bool, bool, true)                                                  \
     MACRO(RemoteDataPath, String, std::string, "")                                                 \
-    MACRO(MaxOpenFilesAtOnce, UInt, unsigned int, UINT_MAX)
+    MACRO(MaxOpenFilesAtOnce, UInt, unsigned int, UINT_MAX)                                        \
+    MACRO(FlattenSteps, Bool, bool, false)                                                         \
+    MACRO(IgnoreFlattenSteps, Bool, bool, false)
 
     struct BP5Params
     {
diff --git a/source/adios2/engine/bp5/BP5Reader.cpp b/source/adios2/engine/bp5/BP5Reader.cpp
index e8fba5f3f3..5a1a581cf0 100644
--- a/source/adios2/engine/bp5/BP5Reader.cpp
+++ b/source/adios2/engine/bp5/BP5Reader.cpp
@@ -72,7 +72,7 @@ void BP5Reader::InstallMetadataForTimestep(size_t Step)
         size_t ThisMDSize =
             helper::ReadValue(m_Metadata.Data(), Position, m_Minifooter.IsLittleEndian);
         char *ThisMD = m_Metadata.Data() + MDPosition;
-        if (m_OpenMode == Mode::ReadRandomAccess)
+        if ((m_OpenMode == Mode::ReadRandomAccess) || (m_FlattenSteps))
         {
             m_BP5Deserializer->InstallMetaData(ThisMD, ThisMDSize, WriterRank, Step);
         }
@@ -98,7 +98,7 @@ StepStatus BP5Reader::BeginStep(StepMode mode, const float timeoutSeconds)
 {
     PERFSTUBS_SCOPED_TIMER("BP5Reader::BeginStep");
 
-    if (m_OpenMode == Mode::ReadRandomAccess)
+    if (m_OpenMode != Mode::Read)
     {
         helper::Throw("Engine", "BP5Reader", "BeginStep",
                                         "BeginStep called in random access mode");
@@ -184,7 +184,7 @@ size_t BP5Reader::CurrentStep() const { return m_CurrentStep; }
 
 void BP5Reader::EndStep()
 {
-    if (m_OpenMode == Mode::ReadRandomAccess)
+    if (m_OpenMode != Mode::Read)
     {
         helper::Throw("Engine", "BP5Reader", "EndStep",
                                         "EndStep called in random access mode");
@@ -802,8 +802,9 @@ void BP5Reader::UpdateBuffer(const TimePoint &timeoutInstant, const Seconds &pol
         // create the serializer object
         if (!m_BP5Deserializer)
         {
-            m_BP5Deserializer = new format::BP5Deserializer(m_WriterIsRowMajor, m_ReaderIsRowMajor,
-                                                            (m_OpenMode == Mode::ReadRandomAccess));
+            m_BP5Deserializer =
+                new format::BP5Deserializer(m_WriterIsRowMajor, m_ReaderIsRowMajor,
+                                            (m_OpenMode != Mode::Read), (m_FlattenSteps));
             m_BP5Deserializer->m_Engine = this;
         }
     }
@@ -900,7 +901,7 @@ void BP5Reader::UpdateBuffer(const TimePoint &timeoutInstant, const Seconds &pol
 
         m_Comm.Bcast(m_Metadata.Data(), inputSize, 0);
 
-        if (m_OpenMode == Mode::ReadRandomAccess)
+        if ((m_OpenMode == Mode::ReadRandomAccess) || m_FlattenSteps)
         {
             for (size_t Step = 0; Step < m_MetadataIndexTable.size(); Step++)
             {
@@ -977,6 +978,15 @@ size_t BP5Reader::ParseMetadataIndex(format::BufferSTL &bufferSTL, const size_t
         const uint8_t val =
             helper::ReadValue(buffer, position, m_Minifooter.IsLittleEndian);
         m_WriterIsRowMajor = val == 'n';
+
+        position = m_FlattenStepsPosition;
+        const uint8_t flatten_val =
+            helper::ReadValue(buffer, position, m_Minifooter.IsLittleEndian);
+        m_FlattenSteps = (flatten_val != 0);
+
+        if (m_Parameters.IgnoreFlattenSteps)
+            m_FlattenSteps = false;
+
         // move position to first row
         position = m_IndexHeaderSize;
     }
@@ -1306,7 +1316,13 @@ void BP5Reader::FlushProfiler()
     }
 }
 
-size_t BP5Reader::DoSteps() const { return m_StepsCount; }
+size_t BP5Reader::DoSteps() const
+{
+    if (m_FlattenSteps)
+        return 1;
+    else
+        return m_StepsCount;
+}
 
 void BP5Reader::NotifyEngineNoVarsQuery()
 {
diff --git a/source/adios2/engine/bp5/BP5Reader.h b/source/adios2/engine/bp5/BP5Reader.h
index 5e121cfa2d..6e8508041c 100644
--- a/source/adios2/engine/bp5/BP5Reader.h
+++ b/source/adios2/engine/bp5/BP5Reader.h
@@ -57,6 +57,8 @@ class BP5Reader : public BP5Engine, public Engine
     MinVarInfo *MinBlocksInfo(const VariableBase &, const size_t Step) const;
     bool VarShape(const VariableBase &Var, const size_t Step, Dims &Shape) const;
     bool VariableMinMax(const VariableBase &, const size_t Step, MinMaxStruct &MinMax);
+    const char *VariableExprStr(const VariableBase &Var);
+    void SetFlattenMode(bool flatten) { m_FlattenSteps = flatten; };
 
 private:
     format::BP5Deserializer *m_BP5Deserializer = nullptr;
@@ -222,6 +224,7 @@ class BP5Reader : public BP5Engine, public Engine
     uint32_t m_WriterColumnMajor = 0;
     bool m_ReaderIsRowMajor = true;
     bool m_WriterIsRowMajor = true;
+    bool m_FlattenSteps = false; // set to true of writer requested all steps be flattened into 1
 
     format::BufferSTL m_MetadataIndex;
     format::BufferSTL m_MetaMetadata;
diff --git a/source/adios2/engine/bp5/BP5Writer.cpp b/source/adios2/engine/bp5/BP5Writer.cpp
index ff1889577e..19c2f1aba0 100644
--- a/source/adios2/engine/bp5/BP5Writer.cpp
+++ b/source/adios2/engine/bp5/BP5Writer.cpp
@@ -507,9 +507,11 @@ void BP5Writer::MarshalAttributes()
 #ifdef ADIOS2_HAVE_DERIVED_VARIABLE
 void BP5Writer::ComputeDerivedVariables()
 {
+    PERFSTUBS_SCOPED_TIMER("BP5Writer::ComputeDerivedVariables");
     auto const &m_VariablesDerived = m_IO.GetDerivedVariables();
     auto const &m_Variables = m_IO.GetVariables();
     // parse all derived variables
+    m_Profiler.Start("DeriveVars");
     for (auto it = m_VariablesDerived.begin(); it != m_VariablesDerived.end(); it++)
     {
         // identify the variables used in the derived variable
@@ -562,6 +564,7 @@ void BP5Writer::ComputeDerivedVariables()
             free(std::get<0>(derivedBlock));
         }
     }
+    m_Profiler.Stop("DeriveVars");
 }
 #endif
 
@@ -1218,9 +1221,12 @@ void BP5Writer::MakeHeader(std::vector &buffer, size_t &position, const st
         helper::CopyToBuffer(buffer, position, version.c_str());
     };
 
-    // auto &buffer = b.m_Buffer;
-    // auto &position = b.m_Position;
-    // auto &absolutePosition = b.m_AbsolutePosition;
+    if (sizeof(BP5IndexTableHeader) != 64)
+    {
+        std::cerr << "BP6 Index Table Header must be 64 bytes" << std::endl;
+        exit(1);
+    }
+
     if (position > 0)
     {
         helper::Throw(
@@ -1273,11 +1279,7 @@ void BP5Writer::MakeHeader(std::vector &buffer, size_t &position, const st
     lf_CopyVersionChar(majorVersion, buffer, position);
     lf_CopyVersionChar(minorVersion, buffer, position);
     lf_CopyVersionChar(patchVersion, buffer, position);
-    ++position;
-
-    // Note: Reader does process and use bytes 36-38 in
-    // BP4Deserialize.cpp::ParseMetadataIndex().
-    // Order and position must match there.
+    position = m_EndianFlagPosition;
 
     // byte 36: endianness
     if (position != m_EndianFlagPosition)
@@ -1327,8 +1329,9 @@ void BP5Writer::MakeHeader(std::vector &buffer, size_t &position, const st
     const uint8_t columnMajor = (m_IO.m_ArrayOrder == ArrayOrdering::ColumnMajor) ? 'y' : 'n';
     helper::CopyToBuffer(buffer, position, &columnMajor);
 
-    // byte 41-63: unused
-    position += 23;
+    helper::CopyToBuffer(buffer, position, &m_Parameters.FlattenSteps);
+    // remainder  unused
+    position = m_IndexHeaderSize;
     // absolutePosition = position;
 }
 
diff --git a/source/adios2/toolkit/derived/Function.cpp b/source/adios2/toolkit/derived/Function.cpp
index 441be6a7c6..da64e37f60 100644
--- a/source/adios2/toolkit/derived/Function.cpp
+++ b/source/adios2/toolkit/derived/Function.cpp
@@ -5,6 +5,7 @@
 #include "Function.tcc"
 #include "adios2/common/ADIOSMacros.h"
 #include "adios2/helper/adiosFunctions.h"
+#include 
 #include 
 
 namespace adios2
@@ -14,6 +15,7 @@ namespace derived
 
 DerivedData AddFunc(std::vector inputData, DataType type)
 {
+    PERFSTUBS_SCOPED_TIMER("derived::Function::AddFunc");
     size_t dataSize = std::accumulate(std::begin(inputData[0].Count), std::end(inputData[0].Count),
                                       1, std::multiplies());
 
@@ -31,6 +33,7 @@ DerivedData AddFunc(std::vector inputData, DataType type)
 
 DerivedData MagnitudeFunc(std::vector inputData, DataType type)
 {
+    PERFSTUBS_SCOPED_TIMER("derived::Function::MagnitudeFunc");
     size_t dataSize = std::accumulate(std::begin(inputData[0].Count), std::end(inputData[0].Count),
                                       1, std::multiplies());
 #define declare_type_mag(T)                                                                        \
@@ -147,6 +150,7 @@ float linear_interp(DerivedData input, size_t index, size_t dim)
  */
 DerivedData Curl3DFunc(const std::vector inputData, DataType type)
 {
+    PERFSTUBS_SCOPED_TIMER("derived::Function::Curl3DFunc");
     size_t dataSize = inputData[0].Count[0] * inputData[0].Count[1] * inputData[0].Count[2];
 
     DerivedData curl;
diff --git a/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp b/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp
index f78f554c9d..22fef0463a 100644
--- a/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp
+++ b/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp
@@ -980,7 +980,13 @@ void BP5Deserializer::InstallMetaData(void *MetadataBlock, size_t BlockLen, size
             {
                 VarRec->FirstTSSeen = Step;
             }
-            if (m_RandomAccessMode && (VarRec->LastTSAdded != Step))
+            if (m_FlattenSteps)
+            {
+                static_cast(VarRec->Variable)->m_AvailableStepsCount = 1;
+                VarRec->LastTSAdded = 0;
+                VarRec->FirstTSSeen = 0;
+            }
+            else if (m_RandomAccessMode && (VarRec->LastTSAdded != Step))
             {
                 static_cast(VarRec->Variable)->m_AvailableStepsCount++;
                 VarRec->LastTSAdded = Step;
@@ -1545,198 +1551,223 @@ BP5Deserializer::GenerateReadRequests(const bool doAllocTempBuffers, size_t *max
 {
     std::vector Ret;
     *maxReadSize = 0;
+    size_t StepLoopStart, StepLoopEnd;
 
     for (size_t ReqIndex = 0; ReqIndex < PendingGetRequests.size(); ReqIndex++)
     {
         auto Req = &PendingGetRequests[ReqIndex];
         auto VarRec = (struct BP5VarRec *)Req->VarRec;
         VariableBase *VB = static_cast(VarRec->Variable);
+        if (m_FlattenSteps)
+        {
+            StepLoopStart = 0;
+            StepLoopEnd = m_ControlArray.size();
+        }
+        else
+        {
+            StepLoopStart = Req->Step;
+            StepLoopEnd = Req->Step + 1;
+        }
+
         if (Req->RequestType == Local)
         {
-            const size_t writerCohortSize = WriterCohortSize(Req->Step);
             size_t NodeFirstBlock = 0;
-            for (size_t WriterRank = 0; WriterRank < writerCohortSize; WriterRank++)
+            for (size_t Step = StepLoopStart; Step < StepLoopEnd; Step++)
             {
-                MetaArrayRecOperator *writer_meta_base = (MetaArrayRecOperator *)GetMetadataBase(
-                    (struct BP5VarRec *)Req->VarRec, Req->Step, WriterRank);
-                if (!writer_meta_base)
+                const size_t writerCohortSize = WriterCohortSize(Step);
+                for (size_t WriterRank = 0; WriterRank < writerCohortSize; WriterRank++)
                 {
-                    continue; // Not writen on this step
-                }
-                size_t NodeLastBlock = NodeFirstBlock + writer_meta_base->BlockCount - 1;
-                if ((NodeFirstBlock <= Req->BlockID) && (NodeLastBlock >= Req->BlockID))
-                {
-                    // block is here
-                    size_t NeededBlock = Req->BlockID - NodeFirstBlock;
-                    size_t StartDim = NeededBlock * VarRec->DimCount;
-                    ReadRequest RR;
-                    RR.Timestep = Req->Step;
-                    RR.WriterRank = WriterRank;
-                    RR.StartOffset = writer_meta_base->DataBlockLocation[NeededBlock];
-                    if (RR.StartOffset == (size_t)-1)
-                        throw std::runtime_error("No data exists for this variable");
-                    if (Req->MemSpace != MemorySpace::Host)
-                        RR.DirectToAppMemory = false;
-                    else if (VarRec->Operator != NULL)
-                        RR.DirectToAppMemory = false;
-                    else
-                        RR.DirectToAppMemory =
-                            IsContiguousTransfer(Req, &writer_meta_base->Offsets[StartDim],
-                                                 &writer_meta_base->Count[StartDim]);
-                    if (VarRec->Operator)
-                    {
-                        // have to have the whole thing
-                        RR.ReadLength = writer_meta_base->DataBlockSize[NeededBlock];
-                    }
-                    else
+                    MetaArrayRecOperator *writer_meta_base =
+                        (MetaArrayRecOperator *)GetMetadataBase((struct BP5VarRec *)Req->VarRec,
+                                                                Step, WriterRank);
+                    if (!writer_meta_base)
                     {
-                        RR.ReadLength =
-                            helper::GetDataTypeSize(VarRec->Type) *
-                            CalcBlockLength(VarRec->DimCount, &writer_meta_base->Count[StartDim]);
+                        continue; // Not writen on this step
                     }
-                    RR.OffsetInBlock = 0;
-                    if (RR.DirectToAppMemory)
+                    size_t NodeLastBlock = NodeFirstBlock + writer_meta_base->BlockCount - 1;
+                    if ((NodeFirstBlock <= Req->BlockID) && (NodeLastBlock >= Req->BlockID))
                     {
-                        RR.DestinationAddr = (char *)Req->Data;
-                        if (Req->Start.size() != 0)
+                        // block is here
+                        size_t NeededBlock = Req->BlockID - NodeFirstBlock;
+                        size_t StartDim = NeededBlock * VarRec->DimCount;
+                        ReadRequest RR;
+                        RR.Timestep = Req->Step;
+                        RR.WriterRank = WriterRank;
+                        RR.StartOffset = writer_meta_base->DataBlockLocation[NeededBlock];
+                        if (RR.StartOffset == (size_t)-1)
+                            throw std::runtime_error("No data exists for this variable");
+                        if (Req->MemSpace != MemorySpace::Host)
+                            RR.DirectToAppMemory = false;
+                        else if (VarRec->Operator != NULL)
+                            RR.DirectToAppMemory = false;
+                        else
+                            RR.DirectToAppMemory =
+                                IsContiguousTransfer(Req, &writer_meta_base->Offsets[StartDim],
+                                                     &writer_meta_base->Count[StartDim]);
+                        if (VarRec->Operator)
+                        {
+                            // have to have the whole thing
+                            RR.ReadLength = writer_meta_base->DataBlockSize[NeededBlock];
+                        }
+                        else
                         {
                             RR.ReadLength = helper::GetDataTypeSize(VarRec->Type) *
-                                            CalcBlockLength(VarRec->DimCount, Req->Count.data());
-                            /* DirectToAppMemory handles only 1D, so offset calc
-                             * is 1D only for the moment */
-                            RR.StartOffset += helper::GetDataTypeSize(VarRec->Type) * Req->Start[0];
+                                            CalcBlockLength(VarRec->DimCount,
+                                                            &writer_meta_base->Count[StartDim]);
                         }
-                    }
-                    else
-                    {
-                        RR.DestinationAddr = nullptr;
-                        if (doAllocTempBuffers)
+                        RR.OffsetInBlock = 0;
+                        if (RR.DirectToAppMemory)
+                        {
+                            RR.DestinationAddr = (char *)Req->Data;
+                            if (Req->Start.size() != 0)
+                            {
+                                RR.ReadLength =
+                                    helper::GetDataTypeSize(VarRec->Type) *
+                                    CalcBlockLength(VarRec->DimCount, Req->Count.data());
+                                /* DirectToAppMemory handles only 1D, so offset calc
+                                 * is 1D only for the moment */
+                                RR.StartOffset +=
+                                    helper::GetDataTypeSize(VarRec->Type) * Req->Start[0];
+                            }
+                        }
+                        else
                         {
-                            RR.DestinationAddr = (char *)malloc(RR.ReadLength);
+                            RR.DestinationAddr = nullptr;
+                            if (doAllocTempBuffers)
+                            {
+                                RR.DestinationAddr = (char *)malloc(RR.ReadLength);
+                            }
+                            *maxReadSize =
+                                (*maxReadSize < RR.ReadLength ? RR.ReadLength : *maxReadSize);
                         }
-                        *maxReadSize =
-                            (*maxReadSize < RR.ReadLength ? RR.ReadLength : *maxReadSize);
+                        RR.ReqIndex = ReqIndex;
+                        RR.BlockID = NeededBlock;
+                        Ret.push_back(RR);
+                        break;
                     }
-                    RR.ReqIndex = ReqIndex;
-                    RR.BlockID = NeededBlock;
-                    Ret.push_back(RR);
-                    break;
+                    NodeFirstBlock += writer_meta_base->BlockCount;
                 }
-                NodeFirstBlock += writer_meta_base->BlockCount;
             }
         }
         else
         {
             /* global case */
-            const size_t writerCohortSize = WriterCohortSize(Req->Step);
-            for (size_t WriterRank = 0; WriterRank < writerCohortSize; WriterRank++)
+            for (size_t Step = StepLoopStart; Step < StepLoopEnd; Step++)
             {
-                MetaArrayRecOperator *writer_meta_base = (MetaArrayRecOperator *)GetMetadataBase(
-                    (struct BP5VarRec *)Req->VarRec, Req->Step, WriterRank);
-                if (!writer_meta_base)
-                    continue; // Not writen on this step
-
-                for (size_t Block = 0; Block < writer_meta_base->BlockCount; Block++)
+                const size_t writerCohortSize = WriterCohortSize(Step);
+                for (size_t WriterRank = 0; WriterRank < writerCohortSize; WriterRank++)
                 {
-                    std::array intersectionstart;
-                    std::array intersectionend;
-                    std::array intersectioncount;
-
-                    size_t StartDim = Block * VarRec->DimCount;
-                    if (IntersectionStartCount(VarRec->DimCount, Req->Start.data(),
-                                               Req->Count.data(),
-                                               &writer_meta_base->Offsets[StartDim],
-                                               &writer_meta_base->Count[StartDim],
-                                               &intersectionstart[0], &intersectioncount[0]))
+                    MetaArrayRecOperator *writer_meta_base =
+                        (MetaArrayRecOperator *)GetMetadataBase((struct BP5VarRec *)Req->VarRec,
+                                                                Step, WriterRank);
+                    if (!writer_meta_base)
+                        continue; // Not writen on this step
+
+                    for (size_t Block = 0; Block < writer_meta_base->BlockCount; Block++)
                     {
-                        if (VarRec->Operator != NULL)
-                        {
-                            // need the whole thing for decompression anyway
-                            ReadRequest RR;
-                            RR.Timestep = Req->Step;
-                            RR.WriterRank = WriterRank;
-                            RR.StartOffset = writer_meta_base->DataBlockLocation[Block];
-                            RR.ReadLength = writer_meta_base->DataBlockSize[Block];
-                            RR.DestinationAddr = nullptr;
-                            if (RR.StartOffset == (size_t)-1)
-                                throw std::runtime_error("No data exists for this variable");
-                            if (doAllocTempBuffers)
-                            {
-                                RR.DestinationAddr = (char *)malloc(RR.ReadLength);
-                            }
-                            *maxReadSize =
-                                (*maxReadSize < RR.ReadLength ? RR.ReadLength : *maxReadSize);
-                            RR.DirectToAppMemory = false;
-                            RR.ReqIndex = ReqIndex;
-                            RR.BlockID = Block;
-                            RR.OffsetInBlock = 0;
-                            Ret.push_back(RR);
-                        }
-                        else
+                        std::array intersectionstart;
+                        std::array intersectionend;
+                        std::array intersectioncount;
+
+                        size_t StartDim = Block * VarRec->DimCount;
+                        if (IntersectionStartCount(VarRec->DimCount, Req->Start.data(),
+                                                   Req->Count.data(),
+                                                   &writer_meta_base->Offsets[StartDim],
+                                                   &writer_meta_base->Count[StartDim],
+                                                   &intersectionstart[0], &intersectioncount[0]))
                         {
-                            for (size_t Dim = 0; Dim < VarRec->DimCount; Dim++)
-                            {
-                                intersectionstart[Dim] -= writer_meta_base->Offsets[StartDim + Dim];
-                            }
-                            size_t StartOffsetInBlock =
-                                VB->m_ElementSize *
-                                LinearIndex(VarRec->DimCount, &writer_meta_base->Count[StartDim],
-                                            &intersectionstart[0], m_ReaderIsRowMajor);
-                            for (size_t Dim = 0; Dim < VarRec->DimCount; Dim++)
-                            {
-                                intersectionend[Dim] =
-                                    intersectionstart[Dim] + intersectioncount[Dim] - 1;
-                            }
-                            size_t EndOffsetInBlock =
-                                VB->m_ElementSize *
-                                (LinearIndex(VarRec->DimCount, &writer_meta_base->Count[StartDim],
-                                             &intersectionend[0], m_ReaderIsRowMajor) +
-                                 1);
-                            ReadRequest RR;
-                            RR.Timestep = Req->Step;
-                            RR.WriterRank = WriterRank;
-                            RR.StartOffset =
-                                writer_meta_base->DataBlockLocation[Block] + StartOffsetInBlock;
-                            if (writer_meta_base->DataBlockLocation[Block] == (size_t)-1)
-                                throw std::runtime_error("No data exists for this variable");
-                            RR.ReadLength = EndOffsetInBlock - StartOffsetInBlock;
-                            if (Req->MemSpace != MemorySpace::Host)
-                                RR.DirectToAppMemory = false;
-                            else
-                                RR.DirectToAppMemory =
-                                    IsContiguousTransfer(Req, &writer_meta_base->Offsets[StartDim],
-                                                         &writer_meta_base->Count[StartDim]);
-                            if (RR.DirectToAppMemory)
-                            {
-                                /*
-                                 * DirectToAppMemory handles only 1D, so offset
-                                 * calc is 1D only for the moment ContigOffset
-                                 * handles the case where our destination is not
-                                 * the start of the destination memory (because
-                                 * some other block filled in that start)
-                                 */
-
-                                ssize_t ContigOffset =
-                                    (writer_meta_base->Offsets[StartDim + 0] - Req->Start[0]) *
-                                    VB->m_ElementSize;
-                                if (ContigOffset < 0)
-                                    ContigOffset = 0;
-                                RR.DestinationAddr = (char *)Req->Data + ContigOffset;
-                            }
-                            else
+                            if (VarRec->Operator != NULL)
                             {
+                                // need the whole thing for decompression anyway
+                                ReadRequest RR;
+                                RR.Timestep = Step;
+                                RR.WriterRank = WriterRank;
+                                RR.StartOffset = writer_meta_base->DataBlockLocation[Block];
+                                RR.ReadLength = writer_meta_base->DataBlockSize[Block];
                                 RR.DestinationAddr = nullptr;
+                                if (RR.StartOffset == (size_t)-1)
+                                    throw std::runtime_error("No data exists for this variable");
                                 if (doAllocTempBuffers)
                                 {
                                     RR.DestinationAddr = (char *)malloc(RR.ReadLength);
                                 }
                                 *maxReadSize =
                                     (*maxReadSize < RR.ReadLength ? RR.ReadLength : *maxReadSize);
+                                RR.DirectToAppMemory = false;
+                                RR.ReqIndex = ReqIndex;
+                                RR.BlockID = Block;
+                                RR.OffsetInBlock = 0;
+                                Ret.push_back(RR);
+                            }
+                            else
+                            {
+                                for (size_t Dim = 0; Dim < VarRec->DimCount; Dim++)
+                                {
+                                    intersectionstart[Dim] -=
+                                        writer_meta_base->Offsets[StartDim + Dim];
+                                }
+                                size_t StartOffsetInBlock =
+                                    VB->m_ElementSize *
+                                    LinearIndex(VarRec->DimCount,
+                                                &writer_meta_base->Count[StartDim],
+                                                &intersectionstart[0], m_ReaderIsRowMajor);
+                                for (size_t Dim = 0; Dim < VarRec->DimCount; Dim++)
+                                {
+                                    intersectionend[Dim] =
+                                        intersectionstart[Dim] + intersectioncount[Dim] - 1;
+                                }
+                                size_t EndOffsetInBlock =
+                                    VB->m_ElementSize *
+                                    (LinearIndex(VarRec->DimCount,
+                                                 &writer_meta_base->Count[StartDim],
+                                                 &intersectionend[0], m_ReaderIsRowMajor) +
+                                     1);
+                                ReadRequest RR;
+                                RR.Timestep = Step;
+                                RR.WriterRank = WriterRank;
+                                RR.StartOffset =
+                                    writer_meta_base->DataBlockLocation[Block] + StartOffsetInBlock;
+                                if (writer_meta_base->DataBlockLocation[Block] == (size_t)-1)
+                                    throw std::runtime_error("No data exists for this variable");
+                                RR.ReadLength = EndOffsetInBlock - StartOffsetInBlock;
+                                if (Req->MemSpace != MemorySpace::Host)
+                                    RR.DirectToAppMemory = false;
+                                else
+                                    RR.DirectToAppMemory = IsContiguousTransfer(
+                                        Req, &writer_meta_base->Offsets[StartDim],
+                                        &writer_meta_base->Count[StartDim]);
+                                if (RR.DirectToAppMemory)
+                                {
+                                    /*
+                                     * DirectToAppMemory handles only 1D, so offset
+                                     * calc is 1D only for the moment ContigOffset
+                                     * handles the case where our destination is not
+                                     * the start of the destination memory (because
+                                     * some other block filled in that start)
+                                     */
+
+                                    ssize_t ContigOffset =
+                                        (writer_meta_base->Offsets[StartDim + 0] - Req->Start[0]) *
+                                        VB->m_ElementSize;
+                                    if (ContigOffset < 0)
+                                        ContigOffset = 0;
+                                    RR.DestinationAddr = (char *)Req->Data + ContigOffset;
+                                }
+                                else
+                                {
+                                    RR.DestinationAddr = nullptr;
+                                    if (doAllocTempBuffers)
+                                    {
+                                        RR.DestinationAddr = (char *)malloc(RR.ReadLength);
+                                    }
+                                    *maxReadSize = (*maxReadSize < RR.ReadLength ? RR.ReadLength
+                                                                                 : *maxReadSize);
+                                }
+                                RR.OffsetInBlock = StartOffsetInBlock;
+                                RR.ReqIndex = ReqIndex;
+                                RR.BlockID = Block;
+                                Ret.push_back(RR);
                             }
-                            RR.OffsetInBlock = StartOffsetInBlock;
-                            RR.ReqIndex = ReqIndex;
-                            RR.BlockID = Block;
-                            Ret.push_back(RR);
                         }
                     }
                 }
@@ -1756,7 +1787,7 @@ void BP5Deserializer::FinalizeGet(const ReadRequest &Read, const bool freeAddr)
 
     int ElementSize = ((struct BP5VarRec *)Req.VarRec)->ElementSize;
     MetaArrayRec *writer_meta_base = (MetaArrayRec *)GetMetadataBase(
-        ((struct BP5VarRec *)Req.VarRec), Req.Step, Read.WriterRank);
+        ((struct BP5VarRec *)Req.VarRec), Read.Timestep, Read.WriterRank);
 
     size_t *GlobalDimensions = writer_meta_base->Shape;
     auto DimCount = writer_meta_base->Dims;
@@ -1967,8 +1998,14 @@ int BP5Deserializer::FindOffset(size_t Dims, const size_t *Size, const size_t *I
 
 BP5Deserializer::BP5Deserializer(bool WriterIsRowMajor, bool ReaderIsRowMajor,
                                  bool RandomAccessMode)
+: BP5Deserializer::BP5Deserializer(WriterIsRowMajor, ReaderIsRowMajor, RandomAccessMode, false)
+{
+}
+
+BP5Deserializer::BP5Deserializer(bool WriterIsRowMajor, bool ReaderIsRowMajor,
+                                 bool RandomAccessMode, bool FlattenSteps)
 : m_WriterIsRowMajor{WriterIsRowMajor}, m_ReaderIsRowMajor{ReaderIsRowMajor},
-  m_RandomAccessMode{RandomAccessMode}
+  m_RandomAccessMode{RandomAccessMode}, m_FlattenSteps{FlattenSteps}
 {
     FMContext Tmp = create_local_FMcontext();
     ReaderFFSContext = create_FFSContext_FM(Tmp);
@@ -2065,17 +2102,57 @@ void *BP5Deserializer::GetMetadataBase(BP5VarRec *VarRec, size_t Step, size_t Wr
 
 MinVarInfo *BP5Deserializer::MinBlocksInfo(const VariableBase &Var, size_t RelStep)
 {
+    auto PossiblyAddValueBlocks = [this](MinVarInfo *MV, BP5VarRec *VarRec, size_t &Id,
+                                         const size_t AbsStep) {
+        const size_t writerCohortSize = WriterCohortSize(AbsStep);
+        for (size_t WriterRank = 0; WriterRank < writerCohortSize; WriterRank++)
+        {
+            MetaArrayRec *writer_meta_base =
+                (MetaArrayRec *)GetMetadataBase(VarRec, AbsStep, WriterRank);
+            if (writer_meta_base)
+            {
+                MinBlockInfo Blk;
+                Blk.MinMax.Init(VarRec->Type);
+                Blk.WriterID = (int)WriterRank;
+                Blk.BlockID = Id++;
+                Blk.BufferP = writer_meta_base;
+                Blk.Start = NULL;
+                Blk.Count = NULL;
+                if (VarRec->OrigShapeID == ShapeID::LocalValue)
+                {
+                    Blk.Count = (size_t *)1;
+                    Blk.Start = (size_t *)WriterRank;
+                }
+                if (writer_meta_base)
+                {
+                    ApplyElementMinMax(Blk.MinMax, VarRec->Type, writer_meta_base);
+                }
+                MV->BlocksInfo.push_back(Blk);
+            }
+        }
+    };
+
     BP5VarRec *VarRec = LookupVarByKey((void *)&Var);
 
     MinVarInfo *MV = new MinVarInfo((int)VarRec->DimCount, VarRec->GlobalDims);
 
     size_t AbsStep = RelStep;
+    size_t StepLoopStart, StepLoopEnd;
 
     if (m_RandomAccessMode)
     {
         AbsStep = VarRec->AbsStepFromRel[RelStep];
     }
-    const size_t writerCohortSize = WriterCohortSize(AbsStep);
+    if (m_FlattenSteps)
+    {
+        StepLoopStart = 0;
+        StepLoopEnd = m_ControlArray.size();
+    }
+    else
+    {
+        StepLoopStart = AbsStep;
+        StepLoopEnd = AbsStep + 1;
+    }
     size_t Id = 0;
     MV->Step = RelStep;
     MV->Dims = (int)VarRec->DimCount;
@@ -2086,6 +2163,7 @@ MinVarInfo *BP5Deserializer::MinBlocksInfo(const VariableBase &Var, size_t RelSt
     if ((VarRec->OrigShapeID == ShapeID::LocalValue) ||
         (VarRec->OrigShapeID == ShapeID::GlobalValue))
     {
+        const size_t writerCohortSize = WriterCohortSize(AbsStep);
         if (VarRec->OrigShapeID == ShapeID::LocalValue)
         {
             // appear as an array locally
@@ -2099,88 +2177,75 @@ MinVarInfo *BP5Deserializer::MinBlocksInfo(const VariableBase &Var, size_t RelSt
         }
         MV->BlocksInfo.reserve(writerCohortSize);
 
-        for (size_t WriterRank = 0; WriterRank < writerCohortSize; WriterRank++)
+        for (size_t Step = StepLoopStart; Step < StepLoopEnd; Step++)
         {
-            MetaArrayRec *writer_meta_base =
-                (MetaArrayRec *)GetMetadataBase(VarRec, AbsStep, WriterRank);
-            if (writer_meta_base)
-            {
-                MinBlockInfo Blk;
-                Blk.MinMax.Init(VarRec->Type);
-                Blk.WriterID = (int)WriterRank;
-                Blk.BlockID = Id++;
-                Blk.BufferP = writer_meta_base;
-                Blk.Start = NULL;
-                Blk.Count = NULL;
-                if (VarRec->OrigShapeID == ShapeID::LocalValue)
-                {
-                    Blk.Count = (size_t *)1;
-                    Blk.Start = (size_t *)WriterRank;
-                }
-                if (writer_meta_base)
-                {
-                    ApplyElementMinMax(Blk.MinMax, VarRec->Type, writer_meta_base);
-                }
-                MV->BlocksInfo.push_back(Blk);
-            }
+            PossiblyAddValueBlocks(MV, VarRec, Id, Step);
         }
         return MV;
     }
-    for (size_t WriterRank = 0; WriterRank < writerCohortSize; WriterRank++)
+    for (size_t Step = StepLoopStart; Step < StepLoopEnd; Step++)
     {
-        MetaArrayRec *writer_meta_base =
-            (MetaArrayRec *)GetMetadataBase(VarRec, AbsStep, WriterRank);
-        if (writer_meta_base)
+        const size_t writerCohortSize = WriterCohortSize(Step);
+        for (size_t WriterRank = 0; WriterRank < writerCohortSize; WriterRank++)
         {
-            if (MV->Shape == NULL)
+            MetaArrayRec *writer_meta_base =
+                (MetaArrayRec *)GetMetadataBase(VarRec, Step, WriterRank);
+            if (writer_meta_base)
             {
-                MV->Shape = writer_meta_base->Shape;
+                if (MV->Shape == NULL)
+                {
+                    MV->Shape = writer_meta_base->Shape;
+                }
+                size_t WriterBlockCount =
+                    writer_meta_base->Dims ? writer_meta_base->DBCount / writer_meta_base->Dims : 1;
+                Id += WriterBlockCount;
             }
-            size_t WriterBlockCount =
-                writer_meta_base->Dims ? writer_meta_base->DBCount / writer_meta_base->Dims : 1;
-            Id += WriterBlockCount;
         }
     }
     MV->BlocksInfo.reserve(Id);
 
     Id = 0;
-    for (size_t WriterRank = 0; WriterRank < writerCohortSize; WriterRank++)
+    for (size_t Step = StepLoopStart; Step < StepLoopEnd; Step++)
     {
-        MetaArrayRec *writer_meta_base =
-            (MetaArrayRec *)GetMetadataBase(VarRec, AbsStep, WriterRank);
+        const size_t writerCohortSize = WriterCohortSize(Step);
+        for (size_t WriterRank = 0; WriterRank < writerCohortSize; WriterRank++)
+        {
+            MetaArrayRec *writer_meta_base =
+                (MetaArrayRec *)GetMetadataBase(VarRec, Step, WriterRank);
 
-        if (!writer_meta_base)
-            continue;
-        size_t WriterBlockCount = MV->Dims ? writer_meta_base->DBCount / MV->Dims : 1;
-        MinMaxStruct *MMs = NULL;
-        if (VarRec->MinMaxOffset != SIZE_MAX)
-        {
-            MMs = *(MinMaxStruct **)(((char *)writer_meta_base) + VarRec->MinMaxOffset);
-        }
-        for (size_t i = 0; i < WriterBlockCount; i++)
-        {
-            size_t *Offsets = NULL;
-            size_t *Count = NULL;
-            if (writer_meta_base->Offsets)
-                Offsets = writer_meta_base->Offsets + (i * MV->Dims);
-            if (writer_meta_base->Count)
-                Count = writer_meta_base->Count + (i * MV->Dims);
-            MinBlockInfo Blk;
-            Blk.WriterID = (int)WriterRank;
-            Blk.BlockID = Id++;
-            Blk.Start = Offsets;
-            Blk.Count = Count;
-            Blk.MinMax.Init(VarRec->Type);
-            if (MMs)
+            if (!writer_meta_base)
+                continue;
+            size_t WriterBlockCount = MV->Dims ? writer_meta_base->DBCount / MV->Dims : 1;
+            MinMaxStruct *MMs = NULL;
+            if (VarRec->MinMaxOffset != SIZE_MAX)
+            {
+                MMs = *(MinMaxStruct **)(((char *)writer_meta_base) + VarRec->MinMaxOffset);
+            }
+            for (size_t i = 0; i < WriterBlockCount; i++)
             {
+                size_t *Offsets = NULL;
+                size_t *Count = NULL;
+                if (writer_meta_base->Offsets)
+                    Offsets = writer_meta_base->Offsets + (i * MV->Dims);
+                if (writer_meta_base->Count)
+                    Count = writer_meta_base->Count + (i * MV->Dims);
+                MinBlockInfo Blk;
+                Blk.WriterID = (int)WriterRank;
+                Blk.BlockID = Id++;
+                Blk.Start = Offsets;
+                Blk.Count = Count;
+                Blk.MinMax.Init(VarRec->Type);
+                if (MMs)
+                {
 
-                char *BlockMinAddr = (((char *)MMs) + 2 * i * VarRec->ElementSize);
-                char *BlockMaxAddr = (((char *)MMs) + (2 * i + 1) * VarRec->ElementSize);
-                ApplyElementMinMax(Blk.MinMax, VarRec->Type, (void *)BlockMinAddr);
-                ApplyElementMinMax(Blk.MinMax, VarRec->Type, (void *)BlockMaxAddr);
+                    char *BlockMinAddr = (((char *)MMs) + 2 * i * VarRec->ElementSize);
+                    char *BlockMaxAddr = (((char *)MMs) + (2 * i + 1) * VarRec->ElementSize);
+                    ApplyElementMinMax(Blk.MinMax, VarRec->Type, (void *)BlockMinAddr);
+                    ApplyElementMinMax(Blk.MinMax, VarRec->Type, (void *)BlockMaxAddr);
+                }
+                // Blk.BufferP
+                MV->BlocksInfo.push_back(Blk);
             }
-            // Blk.BufferP
-            MV->BlocksInfo.push_back(Blk);
         }
     }
     return MV;
diff --git a/source/adios2/toolkit/format/bp5/BP5Deserializer.h b/source/adios2/toolkit/format/bp5/BP5Deserializer.h
index ad6c53fb13..7c8461aacb 100644
--- a/source/adios2/toolkit/format/bp5/BP5Deserializer.h
+++ b/source/adios2/toolkit/format/bp5/BP5Deserializer.h
@@ -36,6 +36,8 @@ class BP5Deserializer : virtual public BP5Base
 
 public:
     BP5Deserializer(bool WriterIsRowMajor, bool ReaderIsRowMajor, bool RandomAccessMode = false);
+    BP5Deserializer(bool WriterIsRowMajor, bool ReaderIsRowMajor, bool RandomAccessMode,
+                    bool FlattenSteps);
 
     ~BP5Deserializer();
 
@@ -173,6 +175,7 @@ class BP5Deserializer : virtual public BP5Base
     FFSContext ReaderFFSContext;
 
     const bool m_RandomAccessMode;
+    const bool m_FlattenSteps;
 
     std::vector m_WriterCohortSize; // per step, in random mode
     size_t m_CurrentWriterCohortSize;       // valid in streaming mode
diff --git a/source/adios2/toolkit/profiling/iochrono/IOChrono.cpp b/source/adios2/toolkit/profiling/iochrono/IOChrono.cpp
index 0cfd7eef26..1002d110ae 100644
--- a/source/adios2/toolkit/profiling/iochrono/IOChrono.cpp
+++ b/source/adios2/toolkit/profiling/iochrono/IOChrono.cpp
@@ -56,6 +56,8 @@ JSONProfiler::JSONProfiler(helper::Comm const &comm) : m_Comm(comm)
     AddTimerWatch("DC_WaitOnAsync2");
     AddTimerWatch("PDW");
 
+    AddTimerWatch("DeriveVars");
+
     m_Profiler.m_Bytes.emplace("buffering", 0);
     AddTimerWatch("DataRead");
     m_Profiler.m_Bytes.emplace("dataread", 0);
diff --git a/source/adios2/toolkit/remote/CMakeLists.txt b/source/adios2/toolkit/remote/CMakeLists.txt
index 2128caa9a9..fdea6ec841 100644
--- a/source/adios2/toolkit/remote/CMakeLists.txt
+++ b/source/adios2/toolkit/remote/CMakeLists.txt
@@ -6,15 +6,11 @@
 if (NOT ADIOS2_USE_PIP)
   add_executable(adios2_remote_server ./remote_server.cpp remote_common.cpp)
 
-  target_link_libraries(adios2_remote_server PUBLIC EVPath::EVPath adios2_core adios2sys
-    PRIVATE $<$:shlwapi>)
+  target_link_libraries(adios2_remote_server
+                        PUBLIC EVPath::EVPath adios2_core adios2sys
+                        PRIVATE adios2::thirdparty::pugixml $<$:shlwapi>)
 
-  get_property(pugixml_headers_path
-    TARGET adios2::thirdparty::pugixml
-    PROPERTY INTERFACE_INCLUDE_DIRECTORIES
-  )
-
-  target_include_directories(adios2_remote_server PRIVATE ${PROJECT_BINARY_DIR} ${pugixml_headers_path})
+  target_include_directories(adios2_remote_server PRIVATE ${PROJECT_BINARY_DIR})
 
   set_property(TARGET adios2_remote_server PROPERTY OUTPUT_NAME adios2_remote_server${ADIOS2_EXECUTABLE_SUFFIX})
   install(TARGETS adios2_remote_server EXPORT adios2
diff --git a/source/adios2/toolkit/transport/file/FileHTTP.cpp b/source/adios2/toolkit/transport/file/FileHTTP.cpp
index 7407cb6ff7..6e52cde493 100644
--- a/source/adios2/toolkit/transport/file/FileHTTP.cpp
+++ b/source/adios2/toolkit/transport/file/FileHTTP.cpp
@@ -8,6 +8,8 @@
  *      Author: Dmitry Ganyushin  ganyushin@gmail.com
  */
 #include "FileHTTP.h"
+#include 
+
 #include 
 #include 
 #include 
diff --git a/source/utils/CMakeLists.txt b/source/utils/CMakeLists.txt
index 30dd48411f..01f5f93c34 100644
--- a/source/utils/CMakeLists.txt
+++ b/source/utils/CMakeLists.txt
@@ -13,17 +13,11 @@ configure_file(
 add_executable(bpls ./bpls/bpls.cpp)
 target_link_libraries(bpls
                       PUBLIC adios2_core adios2sys
-                      PRIVATE $<$:shlwapi>)
-
-get_property(pugixml_headers_path
-  TARGET pugixml
-  PROPERTY INTERFACE_INCLUDE_DIRECTORIES
-)
+                      PRIVATE adios2::thirdparty::pugixml $<$:shlwapi>)
 
 target_include_directories(bpls PRIVATE
   ${PROJECT_BINARY_DIR}
   ${PROJECT_SOURCE_DIR}/bindings/C
-  ${pugixml_headers_path}
 )
 
 set_property(TARGET bpls PROPERTY OUTPUT_NAME bpls${ADIOS2_EXECUTABLE_SUFFIX})
diff --git a/source/utils/bpls/bpls.cpp b/source/utils/bpls/bpls.cpp
index 94dd7805cf..66398ad157 100644
--- a/source/utils/bpls/bpls.cpp
+++ b/source/utils/bpls/bpls.cpp
@@ -95,6 +95,7 @@ bool listmeshes;         // do list meshes too
 bool attrsonly;          // do list attributes only
 bool longopt;            // -l is turned on
 bool timestep;           // read step by step
+bool ignore_flatten;     // dont flatten steps to one
 bool filestream = false; // are we using an engine through FileStream?
 bool noindex;            // do no print array indices with data
 bool printByteAsChar;    // print 8 bit integer arrays as string
@@ -144,6 +145,8 @@ void display_help()
            */
            "  --timestep  | -t           Read content step by step (stream "
            "reading)\n"
+           "  --ignore_flatten           Display steps as written (don't flatten, even if writer "
+           "said to)\n"
            "  --dump      | -d           Dump matched variables/attributes\n"
            "                               To match attributes too, add option "
            "-a\n"
@@ -445,6 +448,7 @@ bool introspectAsBPDir(const std::string &name) noexcept
     char patch = buffer[34];
     bool isBigEndian = static_cast(buffer[36]);
     uint8_t BPVersion = static_cast(buffer[37]);
+    uint8_t flatten = static_cast(buffer[41]);
     bool isActive = false;
     if (BPVersion == 4)
     {
@@ -457,9 +461,9 @@ bool introspectAsBPDir(const std::string &name) noexcept
     {
         uint8_t minversion = static_cast(buffer[38]);
         isActive = static_cast(buffer[39]);
-        printf("ADIOS-BP Version %d.%d %s - ADIOS v%c.%c.%c %s\n", BPVersion, minversion,
+        printf("ADIOS-BP Version %d.%d %s - ADIOS v%c.%c.%c %s%s\n", BPVersion, minversion,
                (isBigEndian ? "Big Endian" : "Little Endian"), major, minor, patch,
-               (isActive ? "- active" : ""));
+               (isActive ? "- active" : ""), (flatten ? "- flatten_steps " : ""));
     }
     else
     {
@@ -617,6 +621,7 @@ int bplsMain(int argc, char *argv[])
     arg.AddBooleanArgument("--noindex", &noindex, " | -y Print data without array indices");
     arg.AddBooleanArgument("-y", &noindex, "");
     arg.AddBooleanArgument("--timestep", ×tep, " | -t Print values of timestep elements");
+    arg.AddBooleanArgument("--ignore_flatten", &ignore_flatten, " Don't flatten steps to one");
     arg.AddBooleanArgument("-t", ×tep, "");
     arg.AddBooleanArgument("--attrs", &listattrs, " | -a List/match attributes too");
     arg.AddBooleanArgument("-a", &listattrs, "");
@@ -765,6 +770,7 @@ void init_globals()
     output_xml = false;
     noindex = false;
     timestep = false;
+    ignore_flatten = false;
     sortnames = false;
     listattrs = false;
     listmeshes = false;
@@ -857,6 +863,8 @@ void printSettings(void)
         printf("      -V : show binary version info of file\n");
     if (timestep)
         printf("      -t : read step-by-step\n");
+    if (ignore_flatten)
+        printf("      --ignore_flatten : ignore FlattenSteps writer specification\n");
 
     if (hidden_attrs)
     {
@@ -1649,6 +1657,11 @@ int doList(std::string path)
         io.SetParameters(p);
     }
 
+    if (ignore_flatten)
+    {
+        io.SetParameters("IgnoreFlattenSteps=on");
+    }
+
     for (auto &engineName : engineList)
     {
         if (verbose > 2)
@@ -1677,7 +1690,13 @@ int doList(std::string path)
             break;
     }
 
-    if (fp != nullptr)
+    if (fp == nullptr)
+    {
+        fprintf(stderr, "\nError: Could not open this file with any ADIOS2 "
+                        "file reading engines\n");
+        return 4;
+    }
+
     {
         //, variables, timesteps, and attributes
         // all parameters are integers,
@@ -1747,12 +1766,6 @@ int doList(std::string path)
         }
         fp->Close();
     }
-    else
-    {
-        fprintf(stderr, "\nError: Could not open this file with any ADIOS2 "
-                        "file reading engines\n");
-        return 4;
-    }
     return 0;
 }
 
@@ -2906,7 +2919,8 @@ bool print_data_xml(const char *s, const size_t length)
     return false;
 }
 
-int print_data(const void *data, int item, DataType adiosvartype, bool allowformat)
+int print_data(const void *data, int item, DataType adiosvartype, bool allowformat,
+               bool char_star_string)
 {
     bool f = format.size() && allowformat;
     const char *fmt = format.c_str();
@@ -2929,9 +2943,15 @@ int print_data(const void *data, int item, DataType adiosvartype, bool allowform
         break;
 
     case DataType::String: {
-        // fprintf(outf, (f ? fmt : "\"%s\""), ((char *)data) + item);
-        const std::string *dataStr = reinterpret_cast(data);
-        fprintf(outf, (f ? fmt : "\"%s\""), dataStr[item].c_str());
+        if (char_star_string)
+        {
+            fprintf(outf, (f ? fmt : "\"%s\""), *((char **)data));
+        }
+        else
+        {
+            const std::string *dataStr = reinterpret_cast(data);
+            fprintf(outf, (f ? fmt : "\"%s\""), dataStr[item].c_str());
+        }
         break;
     }
 
@@ -3380,7 +3400,7 @@ void print_decomp(core::Engine *fp, core::IO *io, core::Variable *variable)
                 if (blocks.size() == 1)
                 {
                     fprintf(outf, " = ");
-                    print_data(blocks[0].BufferP, 0, adiosvartype, true);
+                    print_data(blocks[0].BufferP, 0, adiosvartype, true, /* MBI */ true);
                     fprintf(outf, "\n");
                 }
                 else
@@ -3393,7 +3413,7 @@ void print_decomp(core::Engine *fp, core::IO *io, core::Variable *variable)
                     int col = 0;
                     for (size_t j = 0; j < blocks.size(); j++)
                     {
-                        print_data(blocks[j].BufferP, 0, adiosvartype, true);
+                        print_data(blocks[j].BufferP, 0, adiosvartype, true, /* MBI */ true);
                         ++col;
                         if (j < blocks.size() - 1)
                         {
@@ -3673,9 +3693,12 @@ void print_decomp_singlestep(core::Engine *fp, core::IO *io, core::Variable *
     DataType adiosvartype = variable->m_Type;
     const auto minBlocks = fp->MinBlocksInfo(*variable, fp->CurrentStep());
 
-    std::vector::BPInfo> coreBlocks =
-        fp->BlocksInfo(*variable, fp->CurrentStep());
+    std::vector::BPInfo> coreBlocks;
 
+    if (!minBlocks)
+    {
+        coreBlocks = fp->BlocksInfo(*variable, fp->CurrentStep());
+    }
     if (!minBlocks && coreBlocks.empty())
     {
         return;
diff --git a/source/utils/bpls/bpls.h b/source/utils/bpls/bpls.h
index 8d5c9be035..afd1add048 100644
--- a/source/utils/bpls/bpls.h
+++ b/source/utils/bpls/bpls.h
@@ -88,7 +88,8 @@ bool matchesAMask(const char *name);
 int print_start(const std::string &fnamestr);
 void print_slice_info(core::VariableBase *variable, bool timed, uint64_t *s, uint64_t *c,
                       Dims count);
-int print_data(const void *data, int item, DataType adiosvartypes, bool allowformat);
+int print_data(const void *data, int item, DataType adiosvartypes, bool allowformat,
+               bool char_star_string = false);
 
 /* s is a character array not necessarily null terminated.
  * return false on OK print, true if it not XML (not printed)*/
diff --git a/testing/adios2/engine/bp/CMakeLists.txt b/testing/adios2/engine/bp/CMakeLists.txt
index bf99106bfa..707b9e4ee8 100644
--- a/testing/adios2/engine/bp/CMakeLists.txt
+++ b/testing/adios2/engine/bp/CMakeLists.txt
@@ -91,6 +91,8 @@ set(CTEST_TEST_TIMEOUT 10)
 bp_gtest_add_tests_helper(WriteReadADIOS2 MPI_ALLOW)
 async_gtest_add_tests_helper(WriteReadADIOS2 MPI_ALLOW)
 
+gtest_add_tests_helper(WriteReadFlatten MPI_ONLY BP Engine.BP. .BP5 WORKING_DIRECTORY ${BP5_DIR} EXTRA_ARGS "BP5" )
+
 bp_gtest_add_tests_helper(WriteReadADIOS2fstream MPI_ALLOW)
 bp_gtest_add_tests_helper(WriteReadADIOS2stdio MPI_ALLOW)
 bp_gtest_add_tests_helper(WriteReadAsStreamADIOS2 MPI_ALLOW)
diff --git a/testing/adios2/engine/bp/TestBPWriteReadFlatten.cpp b/testing/adios2/engine/bp/TestBPWriteReadFlatten.cpp
new file mode 100644
index 0000000000..cef5459af9
--- /dev/null
+++ b/testing/adios2/engine/bp/TestBPWriteReadFlatten.cpp
@@ -0,0 +1,1343 @@
+/*
+ * Distributed under the OSI-approved Apache License, Version 2.0.  See
+ * accompanying file Copyright.txt for details.
+ */
+#include 
+#include 
+
+#include 
+#include  //std::iota
+#include 
+
+#include 
+
+#include 
+
+#include "../SmallTestData.h"
+
+std::string engineName;       // comes from command line
+std::string engineParameters; // comes from command line
+
+class BPWriteReadTestFlatten : public ::testing::Test
+{
+public:
+    BPWriteReadTestFlatten() = default;
+
+    SmallTestData m_TestData;
+};
+
+//******************************************************************************
+// 1D 1x8 test data
+//******************************************************************************
+
+// Flatten BP write and read 1D arrays
+TEST_F(BPWriteReadTestFlatten, FlattenBPWriteRead1D8)
+{
+    // Each process would write a 1x8 array and all processes would
+    // form a mpiSize * Nx 1D array
+
+    int mpiRank = 0, mpiSize = 1;
+    // Number of rows
+    const size_t Nx = 8;
+
+#if ADIOS2_USE_MPI
+    MPI_Comm_rank(MPI_COMM_WORLD, &mpiRank);
+    MPI_Comm_size(MPI_COMM_WORLD, &mpiSize);
+    const std::string fname("FlattenBPWriteRead1D8_MPI.bp");
+#else
+    const std::string fname("FlattenBPWriteRead1D8.bp");
+#endif
+
+    // Write test data using BP
+
+#if ADIOS2_USE_MPI
+    adios2::ADIOS adios(MPI_COMM_WORLD);
+#else
+    adios2::ADIOS adios;
+#endif
+    {
+        adios2::IO io = adios.DeclareIO("TestIO");
+
+        // Declare 1D variables (NumOfProcesses * Nx)
+        // The local process' part (start, count) can be defined now or later
+        // before Write().
+        {
+            const adios2::Dims shape{static_cast(Nx * mpiSize)};
+            const adios2::Dims start{static_cast(Nx * mpiRank)};
+            const adios2::Dims count{Nx};
+
+            auto var_char = io.DefineVariable("ch", shape, start, count);
+            EXPECT_TRUE(var_char);
+            auto var_iString = io.DefineVariable("iString");
+            EXPECT_TRUE(var_iString);
+            auto var_i8 = io.DefineVariable("i8", shape, start, count);
+            EXPECT_TRUE(var_i8);
+            auto var_i16 = io.DefineVariable("i16", shape, start, count);
+            EXPECT_TRUE(var_i16);
+            auto var_i32 = io.DefineVariable("i32", shape, start, count);
+            EXPECT_TRUE(var_i32);
+            auto var_i64 = io.DefineVariable("i64", shape, start, count);
+            EXPECT_TRUE(var_i64);
+            auto var_u8 = io.DefineVariable("u8", shape, start, count);
+            EXPECT_TRUE(var_u8);
+            auto var_u16 = io.DefineVariable("u16", shape, start, count);
+            EXPECT_TRUE(var_u16);
+            auto var_u32 = io.DefineVariable("u32", shape, start, count);
+            EXPECT_TRUE(var_u32);
+            auto var_u64 = io.DefineVariable("u64", shape, start, count);
+            EXPECT_TRUE(var_u64);
+            auto var_r32 = io.DefineVariable("r32", shape, start, count);
+            EXPECT_TRUE(var_r32);
+            auto var_r64 = io.DefineVariable("r64", shape, start, count);
+            EXPECT_TRUE(var_r64);
+        }
+
+        if (!engineName.empty())
+        {
+            io.SetEngine(engineName);
+        }
+        else
+        {
+            // Create the BP Engine
+            io.SetEngine("BPFile");
+        }
+        if (!engineParameters.empty())
+        {
+            io.SetParameters(engineParameters);
+        }
+
+        io.SetParameters("FlattenSteps=on");
+        io.AddTransport("file");
+
+        adios2::Engine bpWriter = io.Open(fname, adios2::Mode::Write);
+
+        EXPECT_EQ(bpWriter.OpenMode(), adios2::Mode::Write);
+
+        for (size_t step = 0; step < (size_t)mpiSize; ++step)
+        {
+            // Generate test data for each process uniquely (all as if for step 0)
+            SmallTestData currentTestData =
+                generateNewSmallTestData(m_TestData, static_cast(0), mpiRank, mpiSize);
+
+            // Retrieve the variables that previously went out of scope
+            auto var_char = io.InquireVariable("ch");
+            auto var_iString = io.InquireVariable("iString");
+            auto var_i8 = io.InquireVariable("i8");
+            auto var_i16 = io.InquireVariable("i16");
+            auto var_i32 = io.InquireVariable("i32");
+            auto var_i64 = io.InquireVariable("i64");
+            auto var_u8 = io.InquireVariable("u8");
+            auto var_u16 = io.InquireVariable("u16");
+            auto var_u32 = io.InquireVariable("u32");
+            auto var_u64 = io.InquireVariable("u64");
+            auto var_r32 = io.InquireVariable("r32");
+            auto var_r64 = io.InquireVariable("r64");
+
+            // Make a 1D selection to describe the local dimensions of the
+            // variable we write and its offsets in the global spaces
+            adios2::Box sel({mpiRank * Nx}, {Nx});
+
+            var_char.SetSelection(sel);
+            EXPECT_THROW(var_iString.SetSelection(sel), std::invalid_argument);
+            var_i8.SetSelection(sel);
+            var_i16.SetSelection(sel);
+            var_i32.SetSelection(sel);
+            var_i64.SetSelection(sel);
+            var_u8.SetSelection(sel);
+            var_u16.SetSelection(sel);
+            var_u32.SetSelection(sel);
+            var_u64.SetSelection(sel);
+            var_r32.SetSelection(sel);
+            var_r64.SetSelection(sel);
+
+            // Write each one
+            // fill in the variable with values from starting index to
+            // starting index + count
+            bpWriter.BeginStep();
+
+            if (step == (size_t)mpiRank)
+            {
+                bpWriter.Put(var_char, currentTestData.CHAR.data());
+                bpWriter.Put(var_iString, currentTestData.S1);
+                bpWriter.Put(var_i8, currentTestData.I8.data());
+                bpWriter.Put(var_i16, currentTestData.I16.data());
+                bpWriter.Put(var_i32, currentTestData.I32.data());
+                bpWriter.Put(var_i64, currentTestData.I64.data());
+                bpWriter.Put(var_u8, currentTestData.U8.data());
+                bpWriter.Put(var_u16, currentTestData.U16.data());
+                bpWriter.Put(var_u32, currentTestData.U32.data());
+                bpWriter.Put(var_u64, currentTestData.U64.data());
+                bpWriter.Put(var_r32, currentTestData.R32.data());
+                bpWriter.Put(var_r64, currentTestData.R64.data());
+            }
+            bpWriter.EndStep();
+        }
+
+        // Close the file
+        bpWriter.Close();
+    }
+
+    {
+        adios2::IO io = adios.DeclareIO("ReadIO");
+
+        if (!engineName.empty())
+        {
+            io.SetEngine(engineName);
+        }
+        if (!engineParameters.empty())
+        {
+            io.SetParameters(engineParameters);
+        }
+
+        adios2::Engine bpReader = io.Open(fname, adios2::Mode::ReadRandomAccess);
+
+        EXPECT_EQ(bpReader.Steps(), 1);
+
+        auto var_char = io.InquireVariable("ch");
+        EXPECT_TRUE(var_char);
+        ASSERT_EQ(var_char.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_char.Steps(), 1);
+        ASSERT_EQ(var_char.Shape()[0], mpiSize * Nx);
+
+        auto var_iString = io.InquireVariable("iString");
+        EXPECT_TRUE(var_iString);
+        ASSERT_EQ(var_iString.Shape().size(), 0);
+        ASSERT_EQ(var_iString.Steps(), 1);
+
+        auto var_i8 = io.InquireVariable("i8");
+        EXPECT_TRUE(var_i8);
+        ASSERT_EQ(var_i8.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_i8.Steps(), 1);
+        ASSERT_EQ(var_i8.Shape()[0], mpiSize * Nx);
+
+        auto var_i16 = io.InquireVariable("i16");
+        EXPECT_TRUE(var_i16);
+        ASSERT_EQ(var_i16.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_i16.Steps(), 1);
+        ASSERT_EQ(var_i16.Shape()[0], mpiSize * Nx);
+
+        auto var_i32 = io.InquireVariable("i32");
+        EXPECT_TRUE(var_i32);
+        ASSERT_EQ(var_i32.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_i32.Steps(), 1);
+        ASSERT_EQ(var_i32.Shape()[0], mpiSize * Nx);
+
+        auto var_i64 = io.InquireVariable("i64");
+        EXPECT_TRUE(var_i64);
+        ASSERT_EQ(var_i64.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_i64.Steps(), 1);
+        ASSERT_EQ(var_i64.Shape()[0], mpiSize * Nx);
+
+        auto var_u8 = io.InquireVariable("u8");
+        EXPECT_TRUE(var_u8);
+        ASSERT_EQ(var_u8.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_u8.Steps(), 1);
+        ASSERT_EQ(var_u8.Shape()[0], mpiSize * Nx);
+
+        auto var_u16 = io.InquireVariable("u16");
+        EXPECT_TRUE(var_u16);
+        ASSERT_EQ(var_u16.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_u16.Steps(), 1);
+        ASSERT_EQ(var_u16.Shape()[0], mpiSize * Nx);
+
+        auto var_u32 = io.InquireVariable("u32");
+        EXPECT_TRUE(var_u32);
+        ASSERT_EQ(var_u32.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_u32.Steps(), 1);
+        ASSERT_EQ(var_u32.Shape()[0], mpiSize * Nx);
+
+        auto var_u64 = io.InquireVariable("u64");
+        EXPECT_TRUE(var_u64);
+        ASSERT_EQ(var_u64.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_u64.Steps(), 1);
+        ASSERT_EQ(var_u64.Shape()[0], mpiSize * Nx);
+
+        auto var_r32 = io.InquireVariable("r32");
+        EXPECT_TRUE(var_r32);
+        ASSERT_EQ(var_r32.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_r32.Steps(), 1);
+        ASSERT_EQ(var_r32.Shape()[0], mpiSize * Nx);
+
+        auto var_r64 = io.InquireVariable("r64");
+        EXPECT_TRUE(var_r64);
+        ASSERT_EQ(var_r64.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_r64.Steps(), 1);
+        ASSERT_EQ(var_r64.Shape()[0], mpiSize * Nx);
+
+        // TODO: other types
+
+        SmallTestData testData;
+
+        std::string IString;
+        std::array I8;
+        std::array I16;
+        std::array I32;
+        std::array I64;
+        std::array U8;
+        std::array U16;
+        std::array U32;
+        std::array U64;
+        std::array R32;
+        std::array R64;
+        std::array CHAR;
+
+        const adios2::Dims start{mpiRank * Nx};
+        const adios2::Dims count{Nx};
+
+        const adios2::Box sel(start, count);
+
+        for (size_t t = 0; t < 1; ++t)
+        {
+            var_char.SetSelection(sel);
+
+            var_i8.SetSelection(sel);
+            var_i16.SetSelection(sel);
+            var_i32.SetSelection(sel);
+            var_i64.SetSelection(sel);
+            var_u8.SetSelection(sel);
+            var_u16.SetSelection(sel);
+            var_u32.SetSelection(sel);
+            var_u64.SetSelection(sel);
+            var_r32.SetSelection(sel);
+            var_r64.SetSelection(sel);
+
+            // default step selection should be 0, 1, so no need for that
+
+            // Generate test data for each rank uniquely
+            SmallTestData currentTestData =
+                generateNewSmallTestData(m_TestData, static_cast(0), mpiRank, mpiSize);
+
+            bpReader.Get(var_char, CHAR.data());
+            bpReader.Get(var_iString, IString);
+            bpReader.Get(var_i8, I8.data());
+            bpReader.Get(var_i16, I16.data());
+            bpReader.Get(var_i32, I32.data());
+            bpReader.Get(var_i64, I64.data());
+            bpReader.Get(var_u8, U8.data());
+            bpReader.Get(var_u16, U16.data());
+            bpReader.Get(var_u32, U32.data());
+            bpReader.Get(var_u64, U64.data());
+            bpReader.Get(var_r32, R32.data());
+            bpReader.Get(var_r64, R64.data());
+
+            bpReader.PerformGets();
+
+            EXPECT_EQ(IString, currentTestData.S1) << "rank=" << mpiRank;
+
+            for (size_t i = 0; i < Nx; ++i)
+            {
+                std::stringstream ss;
+                ss << "t=" << t << " i=" << i << " rank=" << mpiRank;
+                std::string msg = ss.str();
+
+                // EXPECT_EQ(TF[i], currentTestData.TF[i]) << msg;
+                EXPECT_EQ(CHAR[i], currentTestData.CHAR[i]) << msg;
+                EXPECT_EQ(I8[i], currentTestData.I8[i]) << msg;
+                EXPECT_EQ(I16[i], currentTestData.I16[i]) << msg;
+                EXPECT_EQ(I32[i], currentTestData.I32[i]) << msg;
+                EXPECT_EQ(I64[i], currentTestData.I64[i]) << msg;
+                EXPECT_EQ(U8[i], currentTestData.U8[i]) << msg;
+                EXPECT_EQ(U16[i], currentTestData.U16[i]) << msg;
+                EXPECT_EQ(U32[i], currentTestData.U32[i]) << msg;
+                EXPECT_EQ(U64[i], currentTestData.U64[i]) << msg;
+                EXPECT_EQ(R32[i], currentTestData.R32[i]) << msg;
+                EXPECT_EQ(R64[i], currentTestData.R64[i]) << msg;
+            }
+        }
+        bpReader.Close();
+    }
+}
+
+//******************************************************************************
+// 2D 2x4 test data
+//******************************************************************************
+
+// ADIOS2 BP write and read 2D array
+TEST_F(BPWriteReadTestFlatten, FlattenBPWriteRead2D2x4)
+{
+    // Each process would write a 2x4 array and all processes would
+    // form a 2D 2 * (numberOfProcess*Nx) matrix where Nx is 4 here
+
+    int mpiRank = 0, mpiSize = 1;
+    // Number of rows
+    const std::size_t Nx = 4;
+
+    // Number of rows
+    const std::size_t Ny = 2;
+
+#if ADIOS2_USE_MPI
+    MPI_Comm_rank(MPI_COMM_WORLD, &mpiRank);
+    MPI_Comm_size(MPI_COMM_WORLD, &mpiSize);
+    const std::string fname("FlattenBPWriteRead2D2x4Test_MPI.bp");
+#else
+    const std::string fname("FlattenBPWriteRead2D2x4Test.bp");
+#endif
+
+    // Write test data using ADIOS2
+
+#if ADIOS2_USE_MPI
+    adios2::ADIOS adios(MPI_COMM_WORLD);
+#else
+    adios2::ADIOS adios;
+#endif
+    {
+        adios2::IO io = adios.DeclareIO("TestIO");
+
+        // Declare 2D variables (Ny * (NumOfProcesses * Nx))
+        // The local process' part (start, count) can be defined now or later
+        // before Write().
+        {
+            const adios2::Dims shape{Ny, static_cast(Nx * mpiSize)};
+            const adios2::Dims start{0, static_cast(mpiRank * Nx)};
+            const adios2::Dims count{Ny, Nx};
+
+            auto var_iString = io.DefineVariable("iString");
+            EXPECT_TRUE(var_iString);
+            auto var_i8 = io.DefineVariable("i8", shape, start, count);
+            EXPECT_TRUE(var_i8);
+            auto var_i16 = io.DefineVariable("i16", shape, start, count);
+            EXPECT_TRUE(var_i16);
+            auto var_i32 = io.DefineVariable("i32", shape, start, count);
+            EXPECT_TRUE(var_i32);
+            auto var_i64 = io.DefineVariable("i64", shape, start, count);
+            EXPECT_TRUE(var_i64);
+            auto var_u8 = io.DefineVariable("u8", shape, start, count);
+            EXPECT_TRUE(var_u8);
+            auto var_u16 = io.DefineVariable("u16", shape, start, count);
+            EXPECT_TRUE(var_u16);
+            auto var_u32 = io.DefineVariable("u32", shape, start, count);
+            EXPECT_TRUE(var_u32);
+            auto var_u64 = io.DefineVariable("u64", shape, start, count);
+            EXPECT_TRUE(var_u64);
+            auto var_r32 = io.DefineVariable("r32", shape, start, count);
+            EXPECT_TRUE(var_r32);
+            auto var_r64 = io.DefineVariable("r64", shape, start, count);
+            EXPECT_TRUE(var_r64);
+        }
+
+        if (!engineName.empty())
+        {
+            io.SetEngine(engineName);
+        }
+        else
+        {
+            // Create the BP Engine
+            io.SetEngine("BPFile");
+        }
+        if (!engineParameters.empty())
+        {
+            io.SetParameters(engineParameters);
+        }
+        io.AddTransport("file");
+
+        io.SetParameters("FlattenSteps=on");
+        adios2::Engine bpWriter = io.Open(fname, adios2::Mode::Write);
+
+        for (size_t step = 0; step < (size_t)mpiSize; ++step)
+        {
+            // Generate test data for each process uniquely
+            SmallTestData currentTestData =
+                generateNewSmallTestData(m_TestData, static_cast(0), mpiRank, mpiSize);
+
+            // Retrieve the variables that previously went out of scope
+            auto var_iString = io.InquireVariable("iString");
+            auto var_i8 = io.InquireVariable("i8");
+            auto var_i16 = io.InquireVariable("i16");
+            auto var_i32 = io.InquireVariable("i32");
+            auto var_i64 = io.InquireVariable("i64");
+            auto var_u8 = io.InquireVariable("u8");
+            auto var_u16 = io.InquireVariable("u16");
+            auto var_u32 = io.InquireVariable("u32");
+            auto var_u64 = io.InquireVariable("u64");
+            auto var_r32 = io.InquireVariable("r32");
+            auto var_r64 = io.InquireVariable("r64");
+
+            // Make a 2D selection to describe the local dimensions of the
+            // variable we write and its offsets in the global spaces
+            adios2::Box sel({0, static_cast(mpiRank * Nx)}, {Ny, Nx});
+            var_i8.SetSelection(sel);
+            var_i16.SetSelection(sel);
+            var_i32.SetSelection(sel);
+            var_i64.SetSelection(sel);
+            var_u8.SetSelection(sel);
+            var_u16.SetSelection(sel);
+            var_u32.SetSelection(sel);
+            var_u64.SetSelection(sel);
+            var_r32.SetSelection(sel);
+            var_r64.SetSelection(sel);
+
+            // Write each one
+            // fill in the variable with values from starting index to
+            // starting index + count
+            bpWriter.BeginStep();
+            if (step == (size_t)mpiRank)
+            {
+                bpWriter.Put(var_iString, currentTestData.S1);
+                bpWriter.Put(var_i8, currentTestData.I8.data());
+                bpWriter.Put(var_i16, currentTestData.I16.data());
+                bpWriter.Put(var_i32, currentTestData.I32.data());
+                bpWriter.Put(var_i64, currentTestData.I64.data());
+                bpWriter.Put(var_u8, currentTestData.U8.data());
+                bpWriter.Put(var_u16, currentTestData.U16.data());
+                bpWriter.Put(var_u32, currentTestData.U32.data());
+                bpWriter.Put(var_u64, currentTestData.U64.data());
+                bpWriter.Put(var_r32, currentTestData.R32.data());
+                bpWriter.Put(var_r64, currentTestData.R64.data());
+            }
+            bpWriter.EndStep();
+        }
+
+        // Close the file
+        bpWriter.Close();
+    }
+
+    {
+        adios2::IO io = adios.DeclareIO("ReadIO");
+
+        if (!engineName.empty())
+        {
+            io.SetEngine(engineName);
+        }
+        if (!engineParameters.empty())
+        {
+            io.SetParameters(engineParameters);
+        }
+
+        adios2::Engine bpReader = io.Open(fname, adios2::Mode::ReadRandomAccess);
+
+        EXPECT_EQ(bpReader.Steps(), 1);
+        auto var_iString = io.InquireVariable("iString");
+        EXPECT_TRUE(var_iString);
+        ASSERT_EQ(var_iString.Shape().size(), 0);
+        ASSERT_EQ(var_iString.Steps(), 1);
+
+        auto var_i8 = io.InquireVariable("i8");
+        EXPECT_TRUE(var_i8);
+        ASSERT_EQ(var_i8.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_i8.Steps(), 1);
+        ASSERT_EQ(var_i8.Shape()[0], Ny);
+        ASSERT_EQ(var_i8.Shape()[1], static_cast(mpiSize * Nx));
+
+        auto var_i16 = io.InquireVariable("i16");
+        EXPECT_TRUE(var_i16);
+        ASSERT_EQ(var_i16.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_i16.Steps(), 1);
+        ASSERT_EQ(var_i16.Shape()[0], Ny);
+        ASSERT_EQ(var_i16.Shape()[1], static_cast(mpiSize * Nx));
+
+        auto var_i32 = io.InquireVariable("i32");
+        EXPECT_TRUE(var_i32);
+        ASSERT_EQ(var_i32.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_i32.Steps(), 1);
+        ASSERT_EQ(var_i32.Shape()[0], Ny);
+        ASSERT_EQ(var_i32.Shape()[1], static_cast(mpiSize * Nx));
+
+        auto var_i64 = io.InquireVariable("i64");
+        EXPECT_TRUE(var_i64);
+        ASSERT_EQ(var_i64.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_i64.Steps(), 1);
+        ASSERT_EQ(var_i64.Shape()[0], Ny);
+        ASSERT_EQ(var_i64.Shape()[1], static_cast(mpiSize * Nx));
+
+        auto var_u8 = io.InquireVariable("u8");
+        EXPECT_TRUE(var_u8);
+        ASSERT_EQ(var_u8.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_u8.Steps(), 1);
+        ASSERT_EQ(var_u8.Shape()[0], Ny);
+        ASSERT_EQ(var_u8.Shape()[1], static_cast(mpiSize * Nx));
+
+        auto var_u16 = io.InquireVariable("u16");
+        EXPECT_TRUE(var_u16);
+        ASSERT_EQ(var_u16.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_u16.Steps(), 1);
+        ASSERT_EQ(var_u16.Shape()[0], Ny);
+        ASSERT_EQ(var_u16.Shape()[1], static_cast(mpiSize * Nx));
+
+        auto var_u32 = io.InquireVariable("u32");
+        EXPECT_TRUE(var_u32);
+        ASSERT_EQ(var_u32.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_u32.Steps(), 1);
+        ASSERT_EQ(var_u32.Shape()[0], Ny);
+        ASSERT_EQ(var_u32.Shape()[1], static_cast(mpiSize * Nx));
+
+        auto var_u64 = io.InquireVariable("u64");
+        EXPECT_TRUE(var_u64);
+        ASSERT_EQ(var_u64.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_u64.Steps(), 1);
+        ASSERT_EQ(var_u64.Shape()[0], Ny);
+        ASSERT_EQ(var_u64.Shape()[1], static_cast(mpiSize * Nx));
+
+        auto var_r32 = io.InquireVariable("r32");
+        EXPECT_TRUE(var_r32);
+        ASSERT_EQ(var_r32.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_r32.Steps(), 1);
+        ASSERT_EQ(var_r32.Shape()[0], Ny);
+        ASSERT_EQ(var_r32.Shape()[1], static_cast(mpiSize * Nx));
+
+        auto var_r64 = io.InquireVariable("r64");
+        EXPECT_TRUE(var_r64);
+        ASSERT_EQ(var_r64.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_r64.Steps(), 1);
+        ASSERT_EQ(var_r64.Shape()[0], Ny);
+        ASSERT_EQ(var_r64.Shape()[1], static_cast(mpiSize * Nx));
+
+        std::string IString;
+        std::array I8;
+        std::array I16;
+        std::array I32;
+        std::array I64;
+        std::array U8;
+        std::array U16;
+        std::array U32;
+        std::array U64;
+        std::array R32;
+        std::array R64;
+
+        const adios2::Dims start{0, static_cast(mpiRank * Nx)};
+        const adios2::Dims count{Ny, Nx};
+
+        const adios2::Box sel(start, count);
+
+        var_i8.SetSelection(sel);
+        var_i16.SetSelection(sel);
+        var_i32.SetSelection(sel);
+        var_i64.SetSelection(sel);
+
+        var_u8.SetSelection(sel);
+        var_u16.SetSelection(sel);
+        var_u32.SetSelection(sel);
+        var_u64.SetSelection(sel);
+
+        var_r32.SetSelection(sel);
+        var_r64.SetSelection(sel);
+
+        for (size_t t = 0; t < 1; ++t)
+        {
+            var_i8.SetStepSelection({t, 1});
+            var_i16.SetStepSelection({t, 1});
+            var_i32.SetStepSelection({t, 1});
+            var_i64.SetStepSelection({t, 1});
+
+            var_u8.SetStepSelection({t, 1});
+            var_u16.SetStepSelection({t, 1});
+            var_u32.SetStepSelection({t, 1});
+            var_u64.SetStepSelection({t, 1});
+
+            var_r32.SetStepSelection({t, 1});
+            var_r64.SetStepSelection({t, 1});
+
+            bpReader.Get(var_iString, IString);
+
+            bpReader.Get(var_i8, I8.data());
+            bpReader.Get(var_i16, I16.data());
+            bpReader.Get(var_i32, I32.data());
+            bpReader.Get(var_i64, I64.data());
+
+            bpReader.Get(var_u8, U8.data());
+            bpReader.Get(var_u16, U16.data());
+            bpReader.Get(var_u32, U32.data());
+            bpReader.Get(var_u64, U64.data());
+
+            bpReader.Get(var_r32, R32.data());
+            bpReader.Get(var_r64, R64.data());
+
+            bpReader.PerformGets();
+            // Generate test data for each rank uniquely
+            SmallTestData currentTestData =
+                generateNewSmallTestData(m_TestData, static_cast(0), mpiRank, mpiSize);
+
+            EXPECT_EQ(IString, currentTestData.S1);
+
+            for (size_t i = 0; i < Nx * Ny; ++i)
+            {
+                std::stringstream ss;
+                ss << "t=" << t << " i=" << i << " rank=" << mpiRank;
+                std::string msg = ss.str();
+
+                EXPECT_EQ(I8[i], currentTestData.I8[i]) << msg;
+                EXPECT_EQ(I16[i], currentTestData.I16[i]) << msg;
+                EXPECT_EQ(I32[i], currentTestData.I32[i]) << msg;
+                EXPECT_EQ(I64[i], currentTestData.I64[i]) << msg;
+                EXPECT_EQ(U8[i], currentTestData.U8[i]) << msg;
+                EXPECT_EQ(U16[i], currentTestData.U16[i]) << msg;
+                EXPECT_EQ(U32[i], currentTestData.U32[i]) << msg;
+                EXPECT_EQ(U64[i], currentTestData.U64[i]) << msg;
+                EXPECT_EQ(R32[i], currentTestData.R32[i]) << msg;
+                EXPECT_EQ(R64[i], currentTestData.R64[i]) << msg;
+            }
+        }
+        bpReader.Close();
+    }
+}
+
+//******************************************************************************
+// 2D 4x2 test data
+//******************************************************************************
+
+TEST_F(BPWriteReadTestFlatten, FlattenBPWriteRead2D4x2)
+{
+    // Each process would write a 4x2 array and all processes would
+    // form a 2D 4 * (NumberOfProcess * Nx) matrix where Nx is 2 here
+
+    int mpiRank = 0, mpiSize = 1;
+    // Number of rows
+    const std::size_t Nx = 2;
+    // Number of cols
+    const std::size_t Ny = 4;
+
+#if ADIOS2_USE_MPI
+    MPI_Comm_rank(MPI_COMM_WORLD, &mpiRank);
+    MPI_Comm_size(MPI_COMM_WORLD, &mpiSize);
+    const std::string fname("FlattenBPWriteRead2D4x2Test_MPI.bp");
+#else
+    const std::string fname("FlattenBPWriteRead2D4x2Test.bp");
+#endif
+
+    // Write test data using ADIOS2
+
+#if ADIOS2_USE_MPI
+    adios2::ADIOS adios(MPI_COMM_WORLD);
+#else
+    adios2::ADIOS adios;
+#endif
+    {
+        adios2::IO io = adios.DeclareIO("TestIO");
+
+        // Declare 2D variables (4 * (NumberOfProcess * Nx))
+        // The local process' part (start, count) can be defined now or later
+        // before Write().
+        {
+            adios2::Dims shape{static_cast(Ny),
+                               static_cast(mpiSize * Nx)};
+            adios2::Dims start{static_cast(0),
+                               static_cast(mpiRank * Nx)};
+            adios2::Dims count{static_cast(Ny), static_cast(Nx)};
+            auto var_i8 = io.DefineVariable("i8", shape, start, count);
+            EXPECT_TRUE(var_i8);
+            auto var_i16 = io.DefineVariable("i16", shape, start, count);
+            EXPECT_TRUE(var_i16);
+            auto var_i32 = io.DefineVariable("i32", shape, start, count);
+            EXPECT_TRUE(var_i32);
+            auto var_i64 = io.DefineVariable("i64", shape, start, count);
+            EXPECT_TRUE(var_i64);
+            auto var_u8 = io.DefineVariable("u8", shape, start, count);
+            EXPECT_TRUE(var_u8);
+            auto var_u16 = io.DefineVariable("u16", shape, start, count);
+            EXPECT_TRUE(var_u16);
+            auto var_u32 = io.DefineVariable("u32", shape, start, count);
+            EXPECT_TRUE(var_u32);
+            auto var_u64 = io.DefineVariable("u64", shape, start, count);
+            EXPECT_TRUE(var_u64);
+            auto var_r32 = io.DefineVariable("r32", shape, start, count);
+            EXPECT_TRUE(var_r32);
+            auto var_r64 = io.DefineVariable("r64", shape, start, count);
+            EXPECT_TRUE(var_r64);
+        }
+
+        if (!engineName.empty())
+        {
+            io.SetEngine(engineName);
+        }
+        else
+        {
+            // Create the BP Engine
+            io.SetEngine("BPFile");
+        }
+        if (!engineParameters.empty())
+        {
+            io.SetParameters(engineParameters);
+        }
+
+        io.AddTransport("file");
+
+        io.SetParameters("FlattenSteps=on");
+        adios2::Engine bpWriter = io.Open(fname, adios2::Mode::Write);
+
+        for (size_t step = 0; step < (size_t)mpiSize; ++step)
+        {
+            // Generate test data for each process uniquely
+            SmallTestData currentTestData =
+                generateNewSmallTestData(m_TestData, static_cast(0), mpiRank, mpiSize);
+
+            // Retrieve the variables that previously went out of scope
+            auto var_i8 = io.InquireVariable("i8");
+            auto var_i16 = io.InquireVariable("i16");
+            auto var_i32 = io.InquireVariable("i32");
+            auto var_i64 = io.InquireVariable("i64");
+            auto var_u8 = io.InquireVariable("u8");
+            auto var_u16 = io.InquireVariable("u16");
+            auto var_u32 = io.InquireVariable("u32");
+            auto var_u64 = io.InquireVariable("u64");
+            auto var_r32 = io.InquireVariable("r32");
+            auto var_r64 = io.InquireVariable("r64");
+
+            // Make a 2D selection to describe the local dimensions of the
+            // variable we write and its offsets in the global spaces
+            adios2::Box sel({0, static_cast(mpiRank * Nx)}, {Ny, Nx});
+            var_i8.SetSelection(sel);
+            var_i16.SetSelection(sel);
+            var_i32.SetSelection(sel);
+            var_i64.SetSelection(sel);
+            var_u8.SetSelection(sel);
+            var_u16.SetSelection(sel);
+            var_u32.SetSelection(sel);
+            var_u64.SetSelection(sel);
+            var_r32.SetSelection(sel);
+            var_r64.SetSelection(sel);
+
+            // Write each one
+            // fill in the variable with values from starting index to
+            // starting index + count
+            bpWriter.BeginStep();
+            if (step == (size_t)mpiRank)
+            {
+                bpWriter.Put(var_i8, currentTestData.I8.data());
+                bpWriter.Put(var_i16, currentTestData.I16.data());
+                bpWriter.Put(var_i32, currentTestData.I32.data());
+                bpWriter.Put(var_i64, currentTestData.I64.data());
+                bpWriter.Put(var_u8, currentTestData.U8.data());
+                bpWriter.Put(var_u16, currentTestData.U16.data());
+                bpWriter.Put(var_u32, currentTestData.U32.data());
+                bpWriter.Put(var_u64, currentTestData.U64.data());
+                bpWriter.Put(var_r32, currentTestData.R32.data());
+                bpWriter.Put(var_r64, currentTestData.R64.data());
+            }
+            bpWriter.EndStep();
+        }
+
+        // Close the file
+        bpWriter.Close();
+    }
+
+    {
+        adios2::IO io = adios.DeclareIO("ReadIO");
+
+        if (!engineName.empty())
+        {
+            io.SetEngine(engineName);
+        }
+        if (!engineParameters.empty())
+        {
+            io.SetParameters(engineParameters);
+        }
+
+        adios2::Engine bpReader = io.Open(fname, adios2::Mode::ReadRandomAccess);
+
+        EXPECT_EQ(bpReader.Steps(), 1);
+
+        auto var_i8 = io.InquireVariable("i8");
+        EXPECT_TRUE(var_i8);
+        ASSERT_EQ(var_i8.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_i8.Steps(), 1);
+        ASSERT_EQ(var_i8.Shape()[0], Ny);
+        ASSERT_EQ(var_i8.Shape()[1], static_cast(mpiSize * Nx));
+
+        auto var_i16 = io.InquireVariable("i16");
+        EXPECT_TRUE(var_i16);
+        ASSERT_EQ(var_i16.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_i16.Steps(), 1);
+        ASSERT_EQ(var_i16.Shape()[0], Ny);
+        ASSERT_EQ(var_i16.Shape()[1], static_cast(mpiSize * Nx));
+
+        auto var_i32 = io.InquireVariable("i32");
+        EXPECT_TRUE(var_i32);
+        ASSERT_EQ(var_i32.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_i32.Steps(), 1);
+        ASSERT_EQ(var_i32.Shape()[0], Ny);
+        ASSERT_EQ(var_i32.Shape()[1], static_cast(mpiSize * Nx));
+
+        auto var_i64 = io.InquireVariable("i64");
+        EXPECT_TRUE(var_i64);
+        ASSERT_EQ(var_i64.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_i64.Steps(), 1);
+        ASSERT_EQ(var_i64.Shape()[0], Ny);
+        ASSERT_EQ(var_i64.Shape()[1], static_cast(mpiSize * Nx));
+
+        auto var_u8 = io.InquireVariable("u8");
+        EXPECT_TRUE(var_u8);
+        ASSERT_EQ(var_u8.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_u8.Steps(), 1);
+        ASSERT_EQ(var_u8.Shape()[0], Ny);
+        ASSERT_EQ(var_u8.Shape()[1], static_cast(mpiSize * Nx));
+
+        auto var_u16 = io.InquireVariable("u16");
+        EXPECT_TRUE(var_u16);
+        ASSERT_EQ(var_u16.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_u16.Steps(), 1);
+        ASSERT_EQ(var_u16.Shape()[0], Ny);
+        ASSERT_EQ(var_u16.Shape()[1], static_cast(mpiSize * Nx));
+
+        auto var_u32 = io.InquireVariable("u32");
+        EXPECT_TRUE(var_u32);
+        ASSERT_EQ(var_u32.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_u32.Steps(), 1);
+        ASSERT_EQ(var_u32.Shape()[0], Ny);
+        ASSERT_EQ(var_u32.Shape()[1], static_cast(mpiSize * Nx));
+
+        auto var_u64 = io.InquireVariable("u64");
+        EXPECT_TRUE(var_u64);
+        ASSERT_EQ(var_u64.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_u64.Steps(), 1);
+        ASSERT_EQ(var_u64.Shape()[0], Ny);
+        ASSERT_EQ(var_u64.Shape()[1], static_cast(mpiSize * Nx));
+
+        auto var_r32 = io.InquireVariable("r32");
+        EXPECT_TRUE(var_r32);
+        ASSERT_EQ(var_r32.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_r32.Steps(), 1);
+        ASSERT_EQ(var_r32.Shape()[0], Ny);
+        ASSERT_EQ(var_r32.Shape()[1], static_cast(mpiSize * Nx));
+
+        auto var_r64 = io.InquireVariable("r64");
+        EXPECT_TRUE(var_r64);
+        ASSERT_EQ(var_r64.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_r64.Steps(), 1);
+        ASSERT_EQ(var_r64.Shape()[0], Ny);
+        ASSERT_EQ(var_r64.Shape()[1], static_cast(mpiSize * Nx));
+
+        // If the size of the array is smaller than the data
+        // the result is weird... double and uint64_t would get
+        // completely garbage data
+        std::array I8;
+        std::array I16;
+        std::array I32;
+        std::array I64;
+        std::array U8;
+        std::array U16;
+        std::array U32;
+        std::array U64;
+        std::array R32;
+        std::array R64;
+
+        const adios2::Dims start{0, static_cast(mpiRank * Nx)};
+        const adios2::Dims count{Ny, Nx};
+
+        const adios2::Box sel(start, count);
+
+        var_i8.SetSelection(sel);
+        var_i16.SetSelection(sel);
+        var_i32.SetSelection(sel);
+        var_i64.SetSelection(sel);
+
+        var_u8.SetSelection(sel);
+        var_u16.SetSelection(sel);
+        var_u32.SetSelection(sel);
+        var_u64.SetSelection(sel);
+
+        var_r32.SetSelection(sel);
+        var_r64.SetSelection(sel);
+
+        for (size_t t = 0; t < 1; ++t)
+        {
+            var_i8.SetStepSelection({t, 1});
+            var_i16.SetStepSelection({t, 1});
+            var_i32.SetStepSelection({t, 1});
+            var_i64.SetStepSelection({t, 1});
+
+            var_u8.SetStepSelection({t, 1});
+            var_u16.SetStepSelection({t, 1});
+            var_u32.SetStepSelection({t, 1});
+            var_u64.SetStepSelection({t, 1});
+
+            var_r32.SetStepSelection({t, 1});
+            var_r64.SetStepSelection({t, 1});
+
+            bpReader.Get(var_i8, I8.data());
+            bpReader.Get(var_i16, I16.data());
+            bpReader.Get(var_i32, I32.data());
+            bpReader.Get(var_i64, I64.data());
+
+            bpReader.Get(var_u8, U8.data());
+            bpReader.Get(var_u16, U16.data());
+            bpReader.Get(var_u32, U32.data());
+            bpReader.Get(var_u64, U64.data());
+
+            bpReader.Get(var_r32, R32.data());
+            bpReader.Get(var_r64, R64.data());
+
+            bpReader.PerformGets();
+
+            // Generate test data for each rank uniquely
+            SmallTestData currentTestData =
+                generateNewSmallTestData(m_TestData, static_cast(t), mpiRank, mpiSize);
+
+            for (size_t i = 0; i < Nx * Ny; ++i)
+            {
+                std::stringstream ss;
+                ss << "t=" << t << " i=" << i << " rank=" << mpiRank;
+                std::string msg = ss.str();
+
+                EXPECT_EQ(I8[i], currentTestData.I8[i]) << msg;
+                EXPECT_EQ(I16[i], currentTestData.I16[i]) << msg;
+                EXPECT_EQ(I32[i], currentTestData.I32[i]) << msg;
+                EXPECT_EQ(I64[i], currentTestData.I64[i]) << msg;
+                EXPECT_EQ(U8[i], currentTestData.U8[i]) << msg;
+                EXPECT_EQ(U16[i], currentTestData.U16[i]) << msg;
+                EXPECT_EQ(U32[i], currentTestData.U32[i]) << msg;
+                EXPECT_EQ(U64[i], currentTestData.U64[i]) << msg;
+                EXPECT_EQ(R32[i], currentTestData.R32[i]) << msg;
+                EXPECT_EQ(R64[i], currentTestData.R64[i]) << msg;
+            }
+        }
+        bpReader.Close();
+    }
+}
+
+TEST_F(BPWriteReadTestFlatten, FlattenBPWriteRead10D2x2)
+{
+    // Each process would write a 2x2x...x2 9D array and all processes would
+    // form a 10D NumberOfProcess x 2 x ... x 2) array
+
+    int mpiRank = 0, mpiSize = 1;
+
+#if ADIOS2_USE_MPI
+    MPI_Comm_rank(MPI_COMM_WORLD, &mpiRank);
+    MPI_Comm_size(MPI_COMM_WORLD, &mpiSize);
+    const std::string fname("FlattenBPWriteRead10D2x2Test_MPI.bp");
+#else
+    const std::string fname("FlattenBPWriteRead10D2x2Test.bp");
+#endif
+
+    size_t NX = static_cast(mpiSize);
+    size_t OX = static_cast(mpiRank);
+    const adios2::Dims shape{NX, 2, 2, 2, 2, 2, 2, 2, 2, 2};
+    const adios2::Dims start{OX, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+    const adios2::Dims count{1, 2, 2, 2, 2, 2, 2, 2, 2, 2};
+
+    std::array R64w, R64r;
+    std::array, 512> CR64w, CR64r;
+
+    // Write test data using ADIOS2
+
+#if ADIOS2_USE_MPI
+    adios2::ADIOS adios(MPI_COMM_WORLD);
+#else
+    adios2::ADIOS adios;
+#endif
+    {
+        adios2::IO io = adios.DeclareIO("TestIO");
+
+        // Declare 10D variables
+        {
+            auto var_r64 = io.DefineVariable("r64", shape, start, count);
+            EXPECT_TRUE(var_r64);
+            auto var_c64 = io.DefineVariable>("cr64", shape, start, count);
+            EXPECT_TRUE(var_c64);
+        }
+
+        if (!engineName.empty())
+        {
+            io.SetEngine(engineName);
+        }
+        else
+        {
+            // Create the BP Engine
+            io.SetEngine("BPFile");
+        }
+        if (!engineParameters.empty())
+        {
+            io.SetParameters(engineParameters);
+        }
+
+        io.AddTransport("file");
+
+        io.SetParameters("FlattenSteps=on");
+        adios2::Engine bpWriter = io.Open(fname, adios2::Mode::Write);
+
+        for (size_t step = 0; step < (size_t)mpiSize; ++step)
+        {
+            // double d = mpiRank + 1 + step / 10.0;
+            double d = mpiRank + 1 / 10.0; // every step is the same
+            // Generate test data for each process uniquely
+            std::for_each(R64w.begin(), R64w.end(), [&](double &v) {
+                v = d;
+                d += 0.0001;
+            });
+            std::for_each(CR64w.begin(), CR64w.end(), [&](std::complex &v) {
+                v.real(d);
+                v.imag(d);
+            });
+
+            // Retrieve the variables that previously went out of scope
+            auto var_r64 = io.InquireVariable("r64");
+            auto var_cr64 = io.InquireVariable>("cr64");
+
+            // Make a 2D selection to describe the local dimensions of the
+            // variable we write and its offsets in the global spaces
+            adios2::Box sel({start, count});
+            var_r64.SetSelection(sel);
+            var_cr64.SetSelection(sel);
+
+            // Write each one
+            // fill in the variable with values from starting index to
+            // starting index + count
+            bpWriter.BeginStep();
+            // write ranks in reverse, end down
+            if (step == (size_t)(mpiSize - mpiRank - 1))
+            {
+                bpWriter.Put(var_r64, R64w.data());
+                bpWriter.Put(var_cr64, CR64w.data());
+            }
+            bpWriter.EndStep();
+        }
+
+        // Close the file
+        bpWriter.Close();
+    }
+
+    {
+        adios2::IO io = adios.DeclareIO("ReadIO");
+
+        if (!engineName.empty())
+        {
+            io.SetEngine(engineName);
+        }
+        if (!engineParameters.empty())
+        {
+            io.SetParameters(engineParameters);
+        }
+
+        adios2::Engine bpReader = io.Open(fname, adios2::Mode::ReadRandomAccess);
+
+        EXPECT_EQ(bpReader.Steps(), 1);
+
+        auto var_r64 = io.InquireVariable("r64");
+        EXPECT_TRUE(var_r64);
+        ASSERT_EQ(var_r64.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_r64.Steps(), 1);
+        ASSERT_EQ(var_r64.Shape().size(), 10);
+        ASSERT_EQ(var_r64.Shape()[0], NX);
+        ASSERT_EQ(var_r64.Shape()[1], 2);
+        ASSERT_EQ(var_r64.Shape()[2], 2);
+        ASSERT_EQ(var_r64.Shape()[3], 2);
+        ASSERT_EQ(var_r64.Shape()[4], 2);
+        ASSERT_EQ(var_r64.Shape()[5], 2);
+        ASSERT_EQ(var_r64.Shape()[6], 2);
+        ASSERT_EQ(var_r64.Shape()[7], 2);
+        ASSERT_EQ(var_r64.Shape()[8], 2);
+        ASSERT_EQ(var_r64.Shape()[9], 2);
+
+        auto var_cr64 = io.InquireVariable>("cr64");
+        EXPECT_TRUE(var_cr64);
+        ASSERT_EQ(var_cr64.ShapeID(), adios2::ShapeID::GlobalArray);
+        ASSERT_EQ(var_cr64.Steps(), 1);
+        ASSERT_EQ(var_cr64.Shape().size(), 10);
+        ASSERT_EQ(var_cr64.Shape()[0], NX);
+        ASSERT_EQ(var_cr64.Shape()[1], 2);
+        ASSERT_EQ(var_cr64.Shape()[2], 2);
+        ASSERT_EQ(var_cr64.Shape()[3], 2);
+        ASSERT_EQ(var_cr64.Shape()[4], 2);
+        ASSERT_EQ(var_cr64.Shape()[5], 2);
+        ASSERT_EQ(var_cr64.Shape()[6], 2);
+        ASSERT_EQ(var_cr64.Shape()[7], 2);
+        ASSERT_EQ(var_cr64.Shape()[8], 2);
+        ASSERT_EQ(var_cr64.Shape()[9], 2);
+
+        const adios2::Box sel(start, count);
+
+        var_r64.SetSelection(sel);
+        var_cr64.SetSelection(sel);
+
+        for (size_t step = 0; step < 1; ++step)
+        {
+            var_r64.SetStepSelection({step, 1});
+            var_cr64.SetStepSelection({step, 1});
+            bpReader.Get(var_r64, R64r.data());
+            bpReader.Get(var_cr64, CR64r.data());
+            bpReader.PerformGets();
+
+            // double d = mpiRank + 1 + step / 10.0;
+            double d = mpiRank + 1 / 10.0;
+            // Re-generate test data for each process uniquely that was written
+            std::for_each(R64w.begin(), R64w.end(), [&](double &v) {
+                v = d;
+                d += 0.0001;
+            });
+            std::for_each(CR64w.begin(), CR64w.end(), [&](std::complex &v) {
+                v.real(d);
+                v.imag(d);
+            });
+
+            for (size_t i = 0; i < 512; ++i)
+            {
+                std::stringstream ss;
+                ss << "t=" << step << " i=" << i << " rank=" << mpiRank;
+                std::string msg = ss.str();
+
+                EXPECT_EQ(R64r[i], R64w[i]) << msg;
+                EXPECT_EQ(CR64r[i], CR64w[i]) << msg;
+            }
+        }
+        bpReader.Close();
+    }
+}
+
+// ADIOS2 BP write and read 1D arrays
+TEST_F(BPWriteReadTestFlatten, FlattenBPWriteReadEmptyProcess)
+{
+#if ADIOS2_USE_MPI
+    // Each process, except rank 0 would write a 1x8 array and all
+    // processes would form a (mpiSize-1) * Nx 1D array
+    const std::string fname("FlattenBPWriteReadEmptyProces.bp");
+
+    int mpiRank = 0, mpiSize = 1;
+    // Number of rows
+    const size_t Nx = 8;
+
+    MPI_Comm_rank(MPI_COMM_WORLD, &mpiRank);
+    MPI_Comm_size(MPI_COMM_WORLD, &mpiSize);
+
+    // Number of steps
+    const size_t NSteps = mpiSize;
+
+    /* This is a parallel test, do not run in serial */
+    adios2::ADIOS adios(MPI_COMM_WORLD);
+    {
+        adios2::IO io = adios.DeclareIO("TestIO");
+        // Declare 1D variables (NumOfProcesses * Nx)
+        // The local process' part (start, count) can be defined now or later
+        // before Write().
+
+        adios2::Dims shape{static_cast(Nx * (mpiSize - 1))};
+        adios2::Dims start{static_cast(Nx * (mpiRank - 1))};
+        adios2::Dims count{Nx};
+        if (!mpiRank)
+        {
+            count[0] = 0;
+            start[0] = 0;
+        }
+
+        auto var_r32 = io.DefineVariable("r32", shape, start, count);
+        EXPECT_TRUE(var_r32);
+
+        if (!engineName.empty())
+        {
+            io.SetEngine(engineName);
+        }
+        else
+        {
+            // Create the BP Engine
+            io.SetEngine("BPFile");
+        }
+        if (!engineParameters.empty())
+        {
+            io.SetParameters(engineParameters);
+        }
+
+        io.SetParameters("FlattenSteps=on");
+        adios2::Engine bpWriter = io.Open(fname, adios2::Mode::Write);
+
+        for (size_t step = 0; step < NSteps; ++step)
+        {
+            // Generate test data for each process uniquely
+            SmallTestData currentTestData =
+                generateNewSmallTestData(m_TestData, static_cast(0), mpiRank, mpiSize);
+
+            bpWriter.BeginStep();
+            if (step == (size_t)mpiRank)
+            {
+                if (mpiRank != 0)
+                {
+                    bpWriter.Put(var_r32, currentTestData.R32.data());
+                }
+            }
+            bpWriter.EndStep();
+        }
+
+        // Close the file
+        bpWriter.Close();
+    }
+
+    {
+        adios2::IO io = adios.DeclareIO("ReadIO");
+
+        if (!engineName.empty())
+        {
+            io.SetEngine(engineName);
+        }
+        if (!engineParameters.empty())
+        {
+            io.SetParameters(engineParameters);
+        }
+
+        adios2::Engine bpReader = io.Open(fname, adios2::Mode::ReadRandomAccess);
+
+        for (size_t step = 0; step < 1; ++step)
+        {
+            // Generate test data for each process uniquely
+            SmallTestData currentTestData =
+                generateNewSmallTestData(m_TestData, static_cast(0), mpiRank + 1, mpiSize);
+
+            auto var_r32 = io.InquireVariable("r32");
+            EXPECT_TRUE(var_r32);
+            ASSERT_EQ(var_r32.ShapeID(), adios2::ShapeID::GlobalArray);
+            ASSERT_EQ(var_r32.Shape()[0], (mpiSize - 1) * Nx);
+
+            SmallTestData testData;
+            std::array R32;
+
+            // last process does not read
+            // readers 0..N-2, while data was produced by 1..N-1
+            adios2::Dims start{mpiRank * Nx};
+            adios2::Dims count{Nx};
+
+            if (mpiRank == mpiSize - 1)
+            {
+                count[0] = 0;
+                start[0] = 0;
+            }
+
+            const adios2::Box sel(start, count);
+            var_r32.SetSelection(sel);
+
+            if (mpiRank < mpiSize - 1)
+            {
+                bpReader.Get(var_r32, R32.data(), adios2::Mode::Sync);
+                for (size_t i = 0; i < Nx; ++i)
+                {
+                    std::stringstream ss;
+                    ss << "t=" << step << " i=" << i << " rank=" << mpiRank;
+                    std::string msg = ss.str();
+                    EXPECT_EQ(R32[i], currentTestData.R32[i]) << msg;
+                }
+            }
+        }
+        bpReader.Close();
+    }
+#else
+    return;
+#endif
+}
+
+//******************************************************************************
+// main
+//******************************************************************************
+
+int main(int argc, char **argv)
+{
+#if ADIOS2_USE_MPI
+    int provided;
+
+    // MPI_THREAD_MULTIPLE is only required if you enable the SST MPI_DP
+    MPI_Init_thread(nullptr, nullptr, MPI_THREAD_MULTIPLE, &provided);
+#endif
+
+    int result;
+    ::testing::InitGoogleTest(&argc, argv);
+
+    if (argc > 1)
+    {
+        engineName = std::string(argv[1]);
+    }
+    if (argc > 2)
+    {
+        engineParameters = std::string(argv[2]);
+    }
+    result = RUN_ALL_TESTS();
+
+#if ADIOS2_USE_MPI
+    MPI_Finalize();
+#endif
+
+    return result;
+}
diff --git a/testing/utils/cwriter/TestUtilsCWriter.bplsh.expected.txt b/testing/utils/cwriter/TestUtilsCWriter.bplsh.expected.txt
index 24bd5cd4c4..13f56300f5 100644
--- a/testing/utils/cwriter/TestUtilsCWriter.bplsh.expected.txt
+++ b/testing/utils/cwriter/TestUtilsCWriter.bplsh.expected.txt
@@ -11,6 +11,7 @@ The time dimension is the first dimension then.
   --attrsonly | -A           List attributes only
   --meshes    | -m           List meshes
   --timestep  | -t           Read content step by step (stream reading)
+  --ignore_flatten           Display steps as written (don't flatten, even if writer said to)
   --dump      | -d           Dump matched variables/attributes
                                To match attributes too, add option -a
   --regexp    | -e           Treat masks as extended regular expressions
diff --git a/thirdparty/EVPath/EVPath/cmib.c b/thirdparty/EVPath/EVPath/cmib.c
index 6cfd7c828f..1a956cedb1 100644
--- a/thirdparty/EVPath/EVPath/cmib.c
+++ b/thirdparty/EVPath/EVPath/cmib.c
@@ -369,9 +369,7 @@ static inline uint16_t get_local_lid(struct ibv_context *context, int port)
 }
 
 static int
-check_host(hostname, sin_addr)
-	char *hostname;
-void *sin_addr;
+check_host(char *hostname,void *sin_addr)
 {
 	struct hostent *host_addr;
 	host_addr = gethostbyname(hostname);
@@ -393,8 +391,7 @@ void *sin_addr;
 }
 
 static ib_conn_data_ptr 
-create_ib_conn_data(svc)
-	CMtrans_services svc;
+create_ib_conn_data(CMtrans_services svc)
 {
 	ib_conn_data_ptr ib_conn_data = svc->malloc_func(sizeof(struct ib_connection_data));
 	memset(ib_conn_data, 0, sizeof(struct ib_connection_data));
@@ -946,9 +943,7 @@ CMIB_data_available(transport_entry trans, CMConnection conn)
  * Accept socket connection
  */
 static void
-ib_accept_conn(void_trans, void_conn_sock)
-	void *void_trans;
-void *void_conn_sock;
+ib_accept_conn(void *void_trans, void *void_conn_sock)
 {
 	transport_entry trans = (transport_entry) void_trans;
 	int conn_sock = (int) (long) void_conn_sock;
@@ -1106,9 +1101,7 @@ void *void_conn_sock;
 }
 
 extern void
-libcmib_LTX_shutdown_conn(svc, scd)
-	CMtrans_services svc;
-ib_conn_data_ptr scd;
+libcmib_LTX_shutdown_conn(CMtrans_services svc, ib_conn_data_ptr scd)
 {
 	svc->trace_out(scd->sd->cm, "CMIB shutdown_conn, removing select %d\n",
 	               scd->fd);
@@ -1141,14 +1134,7 @@ is_private_10(int IP)
 }
 
 static int
-initiate_conn(cm, svc, trans, attrs, ib_conn_data, conn_attr_list, no_more_redirect)
-	CManager cm;
-CMtrans_services svc;
-transport_entry trans;
-attr_list attrs;
-ib_conn_data_ptr ib_conn_data;
-attr_list conn_attr_list;
-int no_more_redirect;
+initiate_conn(CManager cm, CMtrans_services svc, transport_entry trans, attr_list attrs, ib_conn_data_ptr ib_conn_data, attr_list conn_attr_list, int no_more_redirect)
 {
 	int sock;
 
@@ -1413,11 +1399,7 @@ int no_more_redirect;
  * (name_str stores the machine name).
  */
 extern CMConnection
-libcmib_LTX_initiate_conn(cm, svc, trans, attrs)
-	CManager cm;
-CMtrans_services svc;
-transport_entry trans;
-attr_list attrs;
+libcmib_LTX_initiate_conn(CManager cm, CMtrans_services svc, transport_entry trans, attr_list attrs)
 {
 	ib_conn_data_ptr ib_conn_data = create_ib_conn_data(svc);
 	attr_list conn_attr_list = create_attr_list();
@@ -1447,11 +1429,7 @@ attr_list attrs;
  * same as ours and if the IP_PORT matches the one we are listening on.
  */
 extern int
-libcmib_LTX_self_check(cm, svc, trans, attrs)
-	CManager cm;
-CMtrans_services svc;
-transport_entry trans;
-attr_list attrs;
+libcmib_LTX_self_check(CManager cm, CMtrans_services svc, transport_entry trans, attr_list attrs)
 {
 
 	ib_client_data_ptr sd = trans->trans_data;
@@ -1499,12 +1477,9 @@ attr_list attrs;
 }
 
 extern int
-libcmib_LTX_connection_eq(cm, svc, trans, attrs, scd)
-	CManager cm;
-CMtrans_services svc;
-transport_entry trans;
-attr_list attrs;
-ib_conn_data_ptr scd;
+libcmib_LTX_connection_eq(CManager cm, CMtrans_services svc, 
+			  transport_entry trans, attr_list attrs,
+			  ib_conn_data_ptr scd)
 {
 
 	int int_port_num;
@@ -1548,11 +1523,8 @@ ib_conn_data_ptr scd;
  * Create an IP socket for connection from other CMs
  */
 extern attr_list
-libcmib_LTX_non_blocking_listen(cm, svc, trans, listen_info)
-	CManager cm;
-CMtrans_services svc;
-transport_entry trans;
-attr_list listen_info;
+libcmib_LTX_non_blocking_listen(CManager cm, CMtrans_services svc,
+				transport_entry trans, attr_list listen_info)
 {
 	ib_client_data_ptr sd = trans->trans_data;
 	unsigned int length;
@@ -1714,11 +1686,8 @@ struct iovec {
 #endif
 
 extern void
-libcmib_LTX_set_write_notify(trans, svc, scd, enable)
-	transport_entry trans;
-CMtrans_services svc;
-ib_conn_data_ptr scd;
-int enable;
+libcmib_LTX_set_write_notify(transport_entry trans, CMtrans_services svc,
+			     ib_conn_data_ptr scd, int enable)
 {
 	if (enable != 0) {
 		svc->fd_write_select(trans->cm, scd->fd, (select_list_func) trans->write_possible,
@@ -1910,12 +1879,8 @@ libcmib_LTX_writev_complete_notify_func(CMtrans_services svc,
 }
 
 extern int
-libcmib_LTX_writev_func(svc, scd, iovs, iovcnt, attrs)
-CMtrans_services svc;
-ib_conn_data_ptr scd;
-void *iovs;
-int iovcnt;
-attr_list attrs;
+libcmib_LTX_writev_func(CMtrans_services svc, ib_conn_data_ptr scd,
+			void *iovs, int iovcnt, attr_list attrs)
 {
     return libcmib_LTX_writev_complete_notify_func(svc, scd, iovs, iovcnt, 
 						   attrs, NULL, NULL);
@@ -1934,9 +1899,7 @@ free_ib_data(CManager cm, void *sdv)
 }
 
 extern void *
-libcmib_LTX_initialize(cm, svc)
-	CManager cm;
-CMtrans_services svc;
+libcmib_LTX_initialize(CManager cm, CMtrans_services svc)
 {
 	static int atom_init = 0;
 
diff --git a/thirdparty/EVPath/EVPath/gen_interface.pl b/thirdparty/EVPath/EVPath/gen_interface.pl
index f76852ba75..d79335ea5d 100755
--- a/thirdparty/EVPath/EVPath/gen_interface.pl
+++ b/thirdparty/EVPath/EVPath/gen_interface.pl
@@ -10,26 +10,26 @@ sub gen_type
     print REVP "    int condition_var;\n";
     @args = split( ", ",  $arg_str,2);
     foreach $arg (split (", ", $args[1])) {
-	$_ = $arg;
-	if (/^\s*(.*\W+)(\w+)$\s*/) {
-	    $argtype = $1;
-	    $argname = $2;
-	    $argtype =~ s/\s+$//;
-	    $argtype =~ s/(?!\w)\s+(?=\W)//;  #remove unnecessary white space
-	    $argtype =~ s/(?!\W)\s+(?=\w)//;  #remove unnecessary white space
-	    $iotype = $argtype;
-	    $sizetype = $argtype;
-	  switch:for ($argtype) {
-	      /attr_list/ && do {$iotype = "string"; $argtype="char*"; last;};
-	      /char*/ && do {$iotype = "string"; $argtype="char*"; last;};
-	      /EVstone$/ && do {$iotype = "integer"; $argtype="EVstone"; last;};
-	      /EVstone\*/ && do {print REVP "    int ${argname}_len;\n";
+        $_ = $arg;
+        if (/^\s*(.*\W+)(\w+)$\s*/) {
+            $argtype = $1;
+            $argname = $2;
+            $argtype =~ s/\s+$//;
+            $argtype =~ s/(?!\w)\s+(?=\W)//;  #remove unnecessary white space
+            $argtype =~ s/(?!\W)\s+(?=\w)//;  #remove unnecessary white space
+            $iotype = $argtype;
+            $sizetype = $argtype;
+          switch:for ($argtype) {
+              /attr_list/ && do {$iotype = "string"; $argtype="char*"; last;};
+              /char*/ && do {$iotype = "string"; $argtype="char*"; last;};
+              /EVstone$/ && do {$iotype = "integer"; $argtype="EVstone"; last;};
+              /EVstone\*/ && do {print REVP "    int ${argname}_len;\n";
 $iotype = "integer[${argname}_len]"; $argtype="int *"; last;};
-	      /EVSimpleHandlerFunc$/ && do {$iotype = "string"; $argtype="char*"; last;};
-	      /FMStructDescList/ && do {$iotype = "string"; $argtype="char*"; last;};
-	  }
-	}
-	print REVP "    $argtype $argname;\n";
+              /EVSimpleHandlerFunc$/ && do {$iotype = "string"; $argtype="char*"; last;};
+              /FMStructDescList/ && do {$iotype = "string"; $argtype="char*"; last;};
+          }
+        }
+        print REVP "    $argtype $argname;\n";
     }
     print REVP "} ${subr}_request;\n";
     $ret_type = $return_type{$subr};
@@ -52,27 +52,27 @@ sub gen_field_list
     print REVP "    {\"condition_var\", \"integer\", sizeof(int), FMOffset(${subr}_request*, condition_var)},\n";
     @args = split( ", ",  $arg_str,2);
     foreach $arg (split (", ", $args[1])) {
-	$_ = $arg;
-	if (/^\s*(.*\W+)(\w+)$\s*/) {
-	    $argtype = $1;
-	    $argname = $2;
-	    $argtype =~ s/\s+$//;
-	    $argtype =~ s/(?!\w)\s+(?=\W)//;  #remove unnecessary white space
-	    $argtype =~ s/(?!\W)\s+(?=\w)//;  #remove unnecessary white space
-	    $iotype = $argtype;
-	    $sizetype = $argtype;
-	  switch:for ($argtype) {
-	      /attr_list/ && do {$iotype = "string"; $argtype="char*"; last;};
-	      /char*/ && do {$iotype = "string"; $argtype="char*"; last;};
-	      /void*/ && do {$iotype = "char[${argname}_len"; $argtype="void*"; last;};
-	      /int/ && do {$iotype = "integer"; $argtype="int"; last;};
-	      /EVstone/ && do {$iotype = "integer"; $argtype="EVstone"; last;};
-	      /EVaction/ && do {$iotype = "integer"; $argtype="EVaction"; last;};
-	      /EVSimpleHandlerFunc/ && do {$iotype = "string"; $argtype="EVSimpleHandlerFunc"; last;};
-	      /FMStructDescList/ && do {$iotype = "string"; $argtype="EVSimpleHandlerFunc"; last;};
-	  }
-	}
-	print REVP "    {\"$argname\", \"$iotype\", sizeof($sizetype), FMOffset(${subr}_request*,$argname)},\n";
+        $_ = $arg;
+        if (/^\s*(.*\W+)(\w+)$\s*/) {
+            $argtype = $1;
+            $argname = $2;
+            $argtype =~ s/\s+$//;
+            $argtype =~ s/(?!\w)\s+(?=\W)//;  #remove unnecessary white space
+            $argtype =~ s/(?!\W)\s+(?=\w)//;  #remove unnecessary white space
+            $iotype = $argtype;
+            $sizetype = $argtype;
+          switch:for ($argtype) {
+              /attr_list/ && do {$iotype = "string"; $argtype="char*"; last;};
+              /char*/ && do {$iotype = "string"; $argtype="char*"; last;};
+              /void*/ && do {$iotype = "char[${argname}_len"; $argtype="void*"; last;};
+              /int/ && do {$iotype = "integer"; $argtype="int"; last;};
+              /EVstone/ && do {$iotype = "integer"; $argtype="EVstone"; last;};
+              /EVaction/ && do {$iotype = "integer"; $argtype="EVaction"; last;};
+              /EVSimpleHandlerFunc/ && do {$iotype = "string"; $argtype="EVSimpleHandlerFunc"; last;};
+              /FMStructDescList/ && do {$iotype = "string"; $argtype="EVSimpleHandlerFunc"; last;};
+          }
+        }
+        print REVP "    {\"$argname\", \"$iotype\", sizeof($sizetype), FMOffset(${subr}_request*,$argname)},\n";
     }
     print REVP "    {NULL, NULL, 0, 0}\n};\n";
     print REVP "\nFMStructDescRec  ${subr}_req_formats[] = {\n";
@@ -87,17 +87,17 @@ sub gen_stub {
     print REVP "\nextern $return_type{$subr}\n";
     print REVPHI "\nextern $return_type{$subr}\n";
     if ($#args > 0) {
-	print REVP "INT_R$subr(CMConnection conn, $args[1])\n";
-	print REVPHI "INT_R$subr(CMConnection conn, $args[1]);\n";
+        print REVP "INT_R$subr(CMConnection conn, $args[1])\n";
+        print REVPHI "INT_R$subr(CMConnection conn, $args[1]);\n";
     } else {
-	print REVP "INT_R$subr(CMConnection conn)\n";
-	print REVPHI "INT_R$subr(CMConnection conn);\n";
+        print REVP "INT_R$subr(CMConnection conn)\n";
+        print REVPHI "INT_R$subr(CMConnection conn);\n";
     }
     print REVP "{\n";
     
     $_ = $return_type{$subr};
     if (/^\s*void\s*$/) {
-	$return_type{$subr} = "void";
+        $return_type{$subr} = "void";
     }
     $retsubtype = $return_type{$subr};
   switch:  for ($ret_type) {
@@ -115,33 +115,33 @@ sub gen_stub {
     print REVP "    f = INT_CMlookup_format(conn->cm, ${subr}_req_formats);\n";
     $free_list = "";
     foreach $arg (split (", ", $args[1])) {
-	$_ = $arg;
-	if (/^\s*(.*\W+)(\w+)$\s*/) {
-	    $argtype = $1;
-	    $argname = $2;
-	    $argtype =~ s/\s+$//;
-	    $argtype =~ s/(?!\w)\s+(?=\W)//;  #remove unnecessary white space
-	    $argtype =~ s/(?!\W)\s+(?=\w)//;  #remove unnecessary white space
-	    $argright = $argname;
-	  switch:for ($argtype) {
-	      /attr_list/ && do {$argright = "attr_list_to_string($argname)"; $free_list .= "    free(request.$argname);\n"; last;};
-	      /FMStructDescList/ && do {$argright = "get_format_name(conn->cm, $argname)"; last;};
-	  }
-	}
-	print REVP "    request.$argname = $argright;\n";
+        $_ = $arg;
+        if (/^\s*(.*\W+)(\w+)$\s*/) {
+            $argtype = $1;
+            $argname = $2;
+            $argtype =~ s/\s+$//;
+            $argtype =~ s/(?!\w)\s+(?=\W)//;  #remove unnecessary white space
+            $argtype =~ s/(?!\W)\s+(?=\w)//;  #remove unnecessary white space
+            $argright = $argname;
+          switch:for ($argtype) {
+              /attr_list/ && do {$argright = "attr_list_to_string($argname)"; $free_list .= "    free(request.$argname);\n"; last;};
+              /FMStructDescList/ && do {$argright = "get_format_name(conn->cm, $argname)"; last;};
+          }
+        }
+        print REVP "    request.$argname = $argright;\n";
     }
     print REVP "    request.condition_var = cond;\n";
     print REVP "    if (f == NULL) {\n";
     print REVP "        f = INT_CMregister_format(conn->cm, ${subr}_req_formats);\n";
     print REVP "    }\n";
     if ($return_type{$subr} eq "void") {
-	print REVP "    INT_CMCondition_set_client_data(conn->cm, cond, NULL);\n";
+        print REVP "    INT_CMCondition_set_client_data(conn->cm, cond, NULL);\n";
     } else {
-	print REVP "    INT_CMCondition_set_client_data(conn->cm, cond, &response);\n";
+        print REVP "    INT_CMCondition_set_client_data(conn->cm, cond, &response);\n";
     }
     print REVP "    INT_CMwrite(conn, f, &request);\n";
     if ("$free_list" ne "") {
-	print REVP "$free_list";
+        print REVP "$free_list";
     }
     print REVP "    INT_CMCondition_wait(conn->cm, cond);\n";
   switch:for ($return_type{$subr}) {
@@ -173,7 +173,7 @@ sub gen_wrapper {
     print REVP "{\n";
     $_ = $return_type{$subr};
     if (/^\s*void\s*$/) {
-	$return_type{$subr} = "void";
+        $return_type{$subr} = "void";
     }
     $retsubtype = $return_type{$subr};
     switch:  for ($ret_type) {
@@ -190,21 +190,21 @@ sub gen_wrapper {
         print REVP "    ret = INT_R${subr}(conn";
     }
     foreach $arg (split (", ", $args[1])) {
-	$_ = $arg;
-	if (/^\s*(.*\W+)(\w+)$\s*/) {
-	    $argtype = $1;
-	    $argname = $2;
-	    $argtype =~ s/\s+$//;
-	    $argtype =~ s/(?!\w)\s+(?=\W)//;  #remove unnecessary white space
-	    $argtype =~ s/(?!\W)\s+(?=\w)//;  #remove unnecessary white space
-	    $argright = "$argname";
-	  switch:for ($argtype) {
-	      /attr_list/ && do {$argright = "$argname"; last;};
-	      /EVSimpleHandlerFunc/ && do {$argright = "$argname"; last;};
-	      /FMStructDescList/ && do {$argright = "$argname"; last;};
-	  }
-	}
-	print REVP ", $argright";
+        $_ = $arg;
+        if (/^\s*(.*\W+)(\w+)$\s*/) {
+            $argtype = $1;
+            $argname = $2;
+            $argtype =~ s/\s+$//;
+            $argtype =~ s/(?!\w)\s+(?=\W)//;  #remove unnecessary white space
+            $argtype =~ s/(?!\W)\s+(?=\w)//;  #remove unnecessary white space
+            $argright = "$argname";
+          switch:for ($argtype) {
+              /attr_list/ && do {$argright = "$argname"; last;};
+              /EVSimpleHandlerFunc/ && do {$argright = "$argname"; last;};
+              /FMStructDescList/ && do {$argright = "$argname"; last;};
+          }
+        }
+        print REVP ", $argright";
     }
     print REVP ");\n";
     print REVP "    CManager_unlock(conn->cm);\n";
@@ -229,7 +229,7 @@ sub gen_handler {
     print REVP "{\n";
     $_ = $return_type{$subr};
     if (/^\s*void\s*$/) {
-	$return_type{$subr} = "void";
+        $return_type{$subr} = "void";
     }
     $retsubtype = $return_type{$subr};
   switch:  for ($ret_type) {
@@ -248,43 +248,43 @@ sub gen_handler {
     print REVP "        f = INT_CMregister_format(conn->cm, EV_${retsubtype}_response_formats);\n";
     print REVP "    }\n";
     foreach $arg (split (", ", $args[1])) {
-	$_ = $arg;
-	if (/^\s*(.*\W+)(\w+)$\s*/) {
-	    $argtype = $1;
-	    $argname = $2;
-	    $argtype =~ s/\s+$//;
-	    $argtype =~ s/(?!\w)\s+(?=\W)//;  #remove unnecessary white space
-	    $argtype =~ s/(?!\W)\s+(?=\w)//;  #remove unnecessary white space
-	    $argright = $argname;
-	  switch:for ($argtype) {
-	      /attr_list/ && do {print REVP "    attr_list $argname = attr_list_from_string(request->$argname);\n"; last;};
-	      /EVSimpleHandlerFunc/ && do {print REVP "    EVSimpleHandlerFunc $argname = REVPlookup_handler(request->$argname);\n"; last;};
-	      /FMStructDescList/ && do {print REVP "    FMStructDescList $argname = REVPlookup_format_structs(conn->cm, request->$argname);\n"; last;};
-	  }
-	}
+        $_ = $arg;
+        if (/^\s*(.*\W+)(\w+)$\s*/) {
+            $argtype = $1;
+            $argname = $2;
+            $argtype =~ s/\s+$//;
+            $argtype =~ s/(?!\w)\s+(?=\W)//;  #remove unnecessary white space
+            $argtype =~ s/(?!\W)\s+(?=\w)//;  #remove unnecessary white space
+            $argright = $argname;
+          switch:for ($argtype) {
+              /attr_list/ && do {print REVP "    attr_list $argname = attr_list_from_string(request->$argname);\n"; last;};
+              /EVSimpleHandlerFunc/ && do {print REVP "    EVSimpleHandlerFunc $argname = REVPlookup_handler(request->$argname);\n"; last;};
+              /FMStructDescList/ && do {print REVP "    FMStructDescList $argname = REVPlookup_format_structs(conn->cm, request->$argname);\n"; last;};
+          }
+        }
     }
     if ($return_type{$subr} eq "void") {
-	print REVP "    $subr(cm";
+        print REVP "    $subr(cm";
     } else {
-	print REVP "    ret = $subr(cm";
+        print REVP "    ret = $subr(cm";
     }
     $after = "";
     foreach $arg (split (", ", $args[1])) {
-	$_ = $arg;
-	if (/^\s*(.*\W+)(\w+)$\s*/) {
-	    $argtype = $1;
-	    $argname = $2;
-	    $argtype =~ s/\s+$//;
-	    $argtype =~ s/(?!\w)\s+(?=\W)//;  #remove unnecessary white space
-	    $argtype =~ s/(?!\W)\s+(?=\w)//;  #remove unnecessary white space
-	    $argright = "request->$argname";
-	  switch:for ($argtype) {
-	      /attr_list/ && do {$argright = "$argname"; $after .= "free_attr_list($argname);\n"; last;};
-	      /EVSimpleHandlerFunc/ && do {$argright = "$argname"; last;};
-	      /FMStructDescList/ && do {$argright = "$argname"; last;};
-	  }
-	}
-	print REVP ", $argright";
+        $_ = $arg;
+        if (/^\s*(.*\W+)(\w+)$\s*/) {
+            $argtype = $1;
+            $argname = $2;
+            $argtype =~ s/\s+$//;
+            $argtype =~ s/(?!\w)\s+(?=\W)//;  #remove unnecessary white space
+            $argtype =~ s/(?!\W)\s+(?=\w)//;  #remove unnecessary white space
+            $argright = "request->$argname";
+          switch:for ($argtype) {
+              /attr_list/ && do {$argright = "$argname"; $after .= "free_attr_list($argname);\n"; last;};
+              /EVSimpleHandlerFunc/ && do {$argright = "$argname"; last;};
+              /FMStructDescList/ && do {$argright = "$argname"; last;};
+          }
+        }
+        print REVP ", $argright";
     }
     if ($has_client_data == 1) {print REVP ", NULL";}
     print REVP ");\n";
@@ -311,7 +311,7 @@ sub strip_client_data {
     @args = split( ", ",  $arguments{$subr});
     $_ = pop(@args);
     if (!/.*client_data\W*$/) {
-	push(@args, $_);
+        push(@args, $_);
     }
     $arg_str = join(", ", @args);
 }
@@ -321,10 +321,10 @@ sub mod_EVhandler {
     local(@args);
     @args = split( ", ",  $arg_str);
     for( my $i=0; $i < scalar(@args); $i++) {
-	$_ = $args[$i];
-	if (/\W*EVSimpleHandlerFunc.*$/) {
-	    $args[$i] = "char *handler";
-	}
+        $_ = $args[$i];
+        if (/\W*EVSimpleHandlerFunc.*$/) {
+            $args[$i] = "char *handler";
+        }
     }
     $arg_str = join(", ", @args);
     return $arg_str;
@@ -332,23 +332,39 @@ sub mod_EVhandler {
 
 {
     local ($/, *INPUT);
-	
+        
+    $cat = "cat";
+    if ($^O eq "MSWin32") {
+      $cat = "powershell.exe Get-Content";
+    }
     $cat_args = "";
     $has_ev_dfg = 0;
     $cm_only = 0;
+    $index = 0;
     foreach my $a(@ARGV) {
-	if ($a =~ "-CM_ONLY") {
-	    $cm_only = 1;
-	    next;
-	}
-	$a=~s/ /\\ /g;
-	$cat_args .= "$a ";
-	if ($a =~ /ev_dfg/) {
-	    $has_evdfg = 1;
-	}
+      if ($a =~ "-CM_ONLY") {
+          $cm_only = 1;
+          next;
+      }
+      $a=~s/ /\\ /g;
+
+      if ($^O eq "MSWin32") {
+        $sep = ",";
+      } else {
+        $sep = " ";
+      }
+      if ($index == 0)
+      {
+        $sep = "";
+      }
+      $cat_args .= "$sep$a";
+      if ($a =~ /ev_dfg/) {
+          $has_evdfg = 1;
+      }
+      $index++;
     }
-    unless (open(INPUT, "cat $cat_args |")) {
-	die "sudden flaming death, no file: $cat_args\n";
+    unless (open(INPUT, "$cat $cat_args |")) {
+            die "sudden flaming death, no file: $cat_args\n";
     }
 
     $_ = ;
@@ -361,76 +377,76 @@ sub mod_EVhandler {
 LINE:
 for (@f) {
     if (/NOLOCK/) {
-	$nolock = 1;
+        $nolock = 1;
     }
     if (/REMOTE/) {
-	$remote = 1;
+        $remote = 1;
     }
     if (/^extern/) {
-	next LINE if (/\"C\"/);
-	$decl = "";
-	if ($nolock == 1) {$decl = "NOLOCK";}
-	if ($remote == 1) {$decl = "REMOTE";}
-	$nolock = 0;
-	$remote = 0;
-	$pending = 1;
+        next LINE if (/\"C\"/);
+        $decl = "";
+        if ($nolock == 1) {$decl = "NOLOCK";}
+        if ($remote == 1) {$decl = "REMOTE";}
+        $nolock = 0;
+        $remote = 0;
+        $pending = 1;
     }
     if (($pending) && /;/) {
-	$decl = $decl . " " . $_;
-	push (@DECLS, $decl);
-	$pending = 0;
+        $decl = $decl . " " . $_;
+        push (@DECLS, $decl);
+        $pending = 0;
     }
     if ($pending) {
-	$decl = $decl . " " . $_;
+        $decl = $decl . " " . $_;
     }
 }
 for (@DECLS) {
     $nolock = 0;
     $remote = 0;
     if (/NOLOCK/) {
-	s/NOLOCK//g;
-	$nolock = 1;
+        s/NOLOCK//g;
+        $nolock = 1;
     }
     if (/REMOTE/) {
-	s/REMOTE//g;
-	$remote = 1;
+        s/REMOTE//g;
+        $remote = 1;
     }
     if (/extern\W+(\w+\W+)(\w+)\W*\((.*)\)/) {
-	$return = $1;
-	$name = $2;
-	$_ = $3;
-	s/\)//g;
-	s/\s+/ /g;
-	$return =~ s/(?!\w)\s+(?=\W)//;  #remove unnecessary white space
-	$return =~ s/(?!\W)\s+(?=\w)//;  #remove unnecessary white space
-	$return =~ s/\s*$//;  #remove unnecessary white space
-	$return =~ s/^\s*//;  #remove unnecessary white space
-	$return_type{$name} = $return;
-	$args = $_;
-	$arguments{$name} = "$args";
+        $return = $1;
+        $name = $2;
+        $_ = $3;
+        s/\)//g;
+        s/\s+/ /g;
+        $return =~ s/(?!\w)\s+(?=\W)//;  #remove unnecessary white space
+        $return =~ s/(?!\W)\s+(?=\w)//;  #remove unnecessary white space
+        $return =~ s/\s*$//;  #remove unnecessary white space
+        $return =~ s/^\s*//;  #remove unnecessary white space
+        $return_type{$name} = $return;
+        $args = $_;
+        $arguments{$name} = "$args";
     } else {
       if (/extern\W+(\w+\W+\w+\W+)(\w+).*\((.*)\)/) {
-	$return = $1;
-	$name = $2;
-	$_ = $3;
-	s/\)//g;
-	s/\s+/ /g;
-	$return =~ s/(?!\w)\s+(?=\W)//;  #remove unnecessary white space
-	$return =~ s/(?!\W)\s+(?=\w)//;  #remove unnecessary white space
-	$return =~ s/\s*$//;  #remove unnecessary white space
-	$return =~ s/^\s*//;  #remove unnecessary white space
-	$return_type{$name} = $return;
-	$args = $_;
-	$arguments{$name} = "$args";
+        $return = $1;
+        $name = $2;
+        $_ = $3;
+        s/\)//g;
+        s/\s+/ /g;
+        $return =~ s/(?!\w)\s+(?=\W)//;  #remove unnecessary white space
+        $return =~ s/(?!\W)\s+(?=\w)//;  #remove unnecessary white space
+        $return =~ s/\s*$//;  #remove unnecessary white space
+        $return =~ s/^\s*//;  #remove unnecessary white space
+        $return_type{$name} = $return;
+        $args = $_;
+        $arguments{$name} = "$args";
       } else {
-	print "Failed to match function2 on $_\n"
+        print "Failed to match function2 on $_\n"
       }
     }
     if ($nolock == 1) {
-	$nolocking{$name} = 1;
+        $nolocking{$name} = 1;
     }
     if ($remote == 1) {
-	$remote_enabled{$name} = 1;
+        $remote_enabled{$name} = 1;
     }
 }
 
@@ -459,121 +475,121 @@ sub mod_EVhandler {
     print INT "#include \"ev_dfg_internal.h\"\n";
 }
 print INT<cm";
-	    }
-	    if (/EVmaster\W/) {
-		$cmanager = $name. "->cm";
-	    }
-	    if (/EVdfg_stone\W/) {
-		$evdfg_stone = $name;
-	    }
-	}
-
-	$_ = $return_type{$subr};
-	if (/^\s*void\s*$/) {
-	    $return_type{$subr} = "void";
-	}
-	if ($return_type{$subr} ne "void") {
-	    print INT "\t$return_type{$subr} ret;\n";
-	}
-	if (!defined($nolocking{$subr})) {
-	    if (defined($cmanager)) {
-		print INT "\tCManager_lock($cmanager);\n";
-	    } else {
-		if (defined($cmconnection)) {
-		    print INT "\tCManager cm = $cmconnection->cm;\n";
-		} elsif (defined($evsource)) {
-		    print INT "\tCManager cm = $evsource->cm;\n";
-		} elsif (defined($cmtaskhandle)) {
-		    print INT "\tCManager cm = $cmtaskhandle->cm;\n";
-		} elsif (defined($cmformat)) {
-		    print INT "\tCManager cm = $cmformat->cm;\n";
-		} elsif (defined($evdfg)) {
-		    print INT "\tCManager cm = $evdfg->master->cm;\n";
-		} elsif (defined($evdfg_stone)) {
-		    print INT "\tCManager cm = $evdfg_stone->dfg->master->cm;\n";
-		} else {
-#		    print INT "\tCManager cm = duh;\n";
-		}
-		print INT "\tCManager_lock(cm);\n";
-	    }
-	}
-	if ($return_type{$subr} eq "void") {
-	    print INT "\t";
-	} else {
-	    print INT "\tret = ";
-	}
-
-	print INT "INT_$subr(";
-	$first = 1;
-	foreach $arg (split ( ",", $arguments{$subr})) {
-	    if ($first != 1) {
-		print INT ", ";
-	    } else {
-		$first = 0;
-	    }
-	    $_ = $arg;
-	    if (/\W+(\w+)\W*$/) {
-		print INT "$1";
-	    }
-	}
-	print INT ");\n";
-	if ((!defined($nolocking{$subr})) && ($subr ne "CManager_close")) {
-	    if (defined($cmanager)) {
-		print INT "\tCManager_unlock($cmanager);\n";
-	    } else {
-		print INT "\tCManager_unlock(cm);\n";
-	    }
-	}
-	print INT "\treturn ret;\n" unless ($return_type{$subr} eq "void");
-	print INT "}\n";
+        if ($cm_only && (($subr =~ /^EV/) || ($subr =~ /^create/))) {
+            next;
+        }
+        print INT "\nextern $return_type{$subr}\n";
+        print INT "$subr ( $arguments{$subr} )\n";
+        print INT "{\n";
+        undef $cmanager;
+        undef $cmconnection;
+        undef $evsource;
+        undef $cmtaskhandle;
+        undef $cmformat;
+        undef $evdfg;
+        undef $evdfg_stone;
+        foreach $arg (split ( ",", $arguments{$subr})) {
+            $_ = $arg;
+            if (/\W+(\w+)\W*$/) {
+                $name = $1;
+            }
+            if (/CManager/) {
+                $cmanager = $name;
+            }
+            if (/CMConnection/) {
+                $cmconnection = $name;
+            }
+            if (/EVsource/) {
+                $evsource = $name;
+            }
+            if (/CMTaskHandle/) {
+                $cmtaskhandle = $name;
+            }
+            if (/CMFormat\W/) {
+                $cmformat = $name;
+            }
+            if (/EVdfg\W/) {
+                $evdfg = $name;
+            }
+            if (/EVclient\W/) {
+                $cmanager = $name. "->cm";
+            }
+            if (/EVmaster\W/) {
+                $cmanager = $name. "->cm";
+            }
+            if (/EVdfg_stone\W/) {
+                $evdfg_stone = $name;
+            }
+        }
+
+        $_ = $return_type{$subr};
+        if (/^\s*void\s*$/) {
+            $return_type{$subr} = "void";
+        }
+        if ($return_type{$subr} ne "void") {
+            print INT "\t$return_type{$subr} ret;\n";
+        }
+        if (!defined($nolocking{$subr})) {
+            if (defined($cmanager)) {
+                print INT "\tCManager_lock($cmanager);\n";
+            } else {
+                if (defined($cmconnection)) {
+                    print INT "\tCManager cm = $cmconnection->cm;\n";
+                } elsif (defined($evsource)) {
+                    print INT "\tCManager cm = $evsource->cm;\n";
+                } elsif (defined($cmtaskhandle)) {
+                    print INT "\tCManager cm = $cmtaskhandle->cm;\n";
+                } elsif (defined($cmformat)) {
+                    print INT "\tCManager cm = $cmformat->cm;\n";
+                } elsif (defined($evdfg)) {
+                    print INT "\tCManager cm = $evdfg->master->cm;\n";
+                } elsif (defined($evdfg_stone)) {
+                    print INT "\tCManager cm = $evdfg_stone->dfg->master->cm;\n";
+                } else {
+#                   print INT "\tCManager cm = duh;\n";
+                }
+                print INT "\tCManager_lock(cm);\n";
+            }
+        }
+        if ($return_type{$subr} eq "void") {
+            print INT "\t";
+        } else {
+            print INT "\tret = ";
+        }
+
+        print INT "INT_$subr(";
+        $first = 1;
+        foreach $arg (split ( ",", $arguments{$subr})) {
+            if ($first != 1) {
+                print INT ", ";
+            } else {
+                $first = 0;
+            }
+            $_ = $arg;
+            if (/\W+(\w+)\W*$/) {
+                print INT "$1";
+            }
+        }
+        print INT ");\n";
+        if ((!defined($nolocking{$subr})) && ($subr ne "CManager_close")) {
+            if (defined($cmanager)) {
+                print INT "\tCManager_unlock($cmanager);\n";
+            } else {
+                print INT "\tCManager_unlock(cm);\n";
+            }
+        }
+        print INT "\treturn ret;\n" unless ($return_type{$subr} eq "void");
+        print INT "}\n";
     }
 print "done\n";
 
 print INT<evp->fmc, (char*)id);
     free(id);
@@ -825,27 +841,27 @@ sub mod_EVhandler {
 
 EOF
     foreach $subr (sort (keys %return_type)) {
-	defined($remote_enabled{$subr}) || next;
-
-	print REVPH "\nextern $return_type{$subr}\n";
-	$no_client_data = strip_client_data($arguments{$subr});
-	$no_handler = mod_EVhandler($no_client_data);
-	$_ = $arguments{$subr};
-	$has_client_data = 0;
-	if (/.*client_data\W*$/) {
-	    $has_client_data = 1;
-	}
-	@args = split( ", ",  $no_handler, 2);
-	if ($#args > 0) {
-	    print REVPH "R$subr(CMConnection conn, $args[1]);\n";
-	} else {
-	    print REVPH "R$subr(CMConnection conn);\n";
-	}
-	gen_type(${subr}, $no_handler);
-	gen_field_list(${subr}, $no_handler);
-	gen_stub(${subr}, $no_handler);
-	gen_wrapper(${subr},  $no_handler, $has_client_data);
-	gen_handler(${subr}, $no_client_data, $has_client_data);
+        defined($remote_enabled{$subr}) || next;
+
+        print REVPH "\nextern $return_type{$subr}\n";
+        $no_client_data = strip_client_data($arguments{$subr});
+        $no_handler = mod_EVhandler($no_client_data);
+        $_ = $arguments{$subr};
+        $has_client_data = 0;
+        if (/.*client_data\W*$/) {
+            $has_client_data = 1;
+        }
+        @args = split( ", ",  $no_handler, 2);
+        if ($#args > 0) {
+            print REVPH "R$subr(CMConnection conn, $args[1]);\n";
+        } else {
+            print REVPH "R$subr(CMConnection conn);\n";
+        }
+        gen_type(${subr}, $no_handler);
+        gen_field_list(${subr}, $no_handler);
+        gen_stub(${subr}, $no_handler);
+        gen_wrapper(${subr},  $no_handler, $has_client_data);
+        gen_handler(${subr}, $no_client_data, $has_client_data);
     }
 
 print REVP<condition_var);
     if (NULL != response_ptr) {
-	*response_ptr = data;
+        *response_ptr = data;
     }
     CMCondition_signal(cm, response->condition_var);
 }
@@ -866,7 +882,7 @@ sub mod_EVhandler {
     EV_void_response *response = (EV_void_response*) data;
     void **response_ptr = CMCondition_get_client_data(cm, response->condition_var);
     if (NULL != response_ptr) {
-	memcpy(response_ptr, data, sizeof(EV_int_response));
+        memcpy(response_ptr, data, sizeof(EV_int_response));
     }
     CMCondition_signal(cm, response->condition_var);
 }
@@ -877,8 +893,8 @@ sub mod_EVhandler {
     EV_string_response *response = (EV_string_response*) data;
     EV_string_response *stub_ptr = CMCondition_get_client_data(cm, response->condition_var);
     if (NULL != stub_ptr) {
-	memcpy(stub_ptr, data, sizeof(EV_string_response));
-	stub_ptr->ret = strdup(response->ret);
+        memcpy(stub_ptr, data, sizeof(EV_string_response));
+        stub_ptr->ret = strdup(response->ret);
     }
     CMCondition_signal(cm, response->condition_var);
 }
@@ -889,8 +905,8 @@ sub mod_EVhandler {
     EV_EVevent_list_response *response = (EV_EVevent_list_response*) data;
     EV_EVevent_list_response *stub_ptr = CMCondition_get_client_data(cm, response->condition_var);
     if (NULL != stub_ptr) {
-	memcpy(stub_ptr, data, sizeof(EV_EVevent_list_response));
-	stub_ptr->ret = copy_EVevent_list(response->ret);
+        memcpy(stub_ptr, data, sizeof(EV_EVevent_list_response));
+        stub_ptr->ret = copy_EVevent_list(response->ret);
     }
     CMCondition_signal(cm, response->condition_var);
 }
@@ -915,7 +931,7 @@ sub mod_EVhandler {
 EOF
 print REVPH<
 #include 
 #include 
 #endif
diff --git a/thirdparty/EVPath/EVPath/mtests/cmtest.c b/thirdparty/EVPath/EVPath/mtests/cmtest.c
index 0396443fee..3b2ef85810 100644
--- a/thirdparty/EVPath/EVPath/mtests/cmtest.c
+++ b/thirdparty/EVPath/EVPath/mtests/cmtest.c
@@ -18,6 +18,7 @@
 #define srand48(x)
 #define kill(x,y) TerminateProcess(OpenProcess(0, 0, (DWORD)x),y)
 #else
+#include 
 #include 
 #include 
 #endif
diff --git a/thirdparty/EVPath/EVPath/rtests/evtest.c b/thirdparty/EVPath/EVPath/rtests/evtest.c
index cf1a2ac732..615fc7f903 100644
--- a/thirdparty/EVPath/EVPath/rtests/evtest.c
+++ b/thirdparty/EVPath/EVPath/rtests/evtest.c
@@ -20,6 +20,7 @@
 #define srand48(x)
 #define kill(x,y) TerminateProcess(OpenProcess(0,0,(DWORD)x),y)
 #else
+#include 
 #include 
 #include 
 #endif
diff --git a/thirdparty/EVPath/EVPath/rtests/extract_test.c b/thirdparty/EVPath/EVPath/rtests/extract_test.c
index ed2677329b..47fe155248 100644
--- a/thirdparty/EVPath/EVPath/rtests/extract_test.c
+++ b/thirdparty/EVPath/EVPath/rtests/extract_test.c
@@ -19,6 +19,7 @@
 #define srand48(x)
 #define kill(x,y) TerminateProcess(OpenProcess(0, 0, (DWORD)x),y)
 #else
+#include 
 #include 
 #include 
 #endif
diff --git a/thirdparty/EVPath/EVPath/rtests/remote_terminal_test.c b/thirdparty/EVPath/EVPath/rtests/remote_terminal_test.c
index 999bb626b4..8896145d1c 100644
--- a/thirdparty/EVPath/EVPath/rtests/remote_terminal_test.c
+++ b/thirdparty/EVPath/EVPath/rtests/remote_terminal_test.c
@@ -19,6 +19,7 @@
 #define srand48(x)
 #define kill(x,y) TerminateProcess(OpenProcess(0,0,(DWORD)x),y)
 #else
+#include 
 #include 
 #include 
 #endif
diff --git a/thirdparty/EVPath/EVPath/tests/evtest.c b/thirdparty/EVPath/EVPath/tests/evtest.c
index b13102dbf8..59481adf78 100644
--- a/thirdparty/EVPath/EVPath/tests/evtest.c
+++ b/thirdparty/EVPath/EVPath/tests/evtest.c
@@ -18,6 +18,7 @@
 #define srand48(x)
 #define kill(x,y) TerminateProcess(OpenProcess(0,0,(DWORD)x),y)
 #else
+#include 
 #include 
 #include 
 #endif
diff --git a/thirdparty/dill/dill/.clang-format b/thirdparty/dill/dill/.clang-format
new file mode 100644
index 0000000000..ebaa492bc6
--- /dev/null
+++ b/thirdparty/dill/dill/.clang-format
@@ -0,0 +1,18 @@
+Language:  Cpp
+BasedOnStyle:  Chromium
+BreakBeforeBraces:  Custom
+IndentWidth: 4
+ContinuationIndentWidth: 4
+AccessModifierOffset: -4
+Standard: Cpp11
+ColumnLimit: 80
+AllowAllParametersOfDeclarationOnNextLine: false
+AlwaysBreakAfterReturnType: All
+AlignEscapedNewlines: Right
+AlignAfterOpenBracket: Align
+SortUsingDeclarations: false
+IndentCaseLabels: false
+BraceWrapping:
+  AfterFunction: true
+  SplitEmptyFunction: true
+  SplitEmptyRecord: true
diff --git a/thirdparty/ffs/ffs/CMakeLists.txt b/thirdparty/ffs/ffs/CMakeLists.txt
index c9f516988d..3cd2733e9d 100644
--- a/thirdparty/ffs/ffs/CMakeLists.txt
+++ b/thirdparty/ffs/ffs/CMakeLists.txt
@@ -258,6 +258,7 @@ endif()
 CHECK_INCLUDE_FILE(malloc.h HAVE_MALLOC_H)
 CHECK_INCLUDE_FILE(memory.h HAVE_MEMORY_H)
 CHECK_INCLUDE_FILE(netdb.h HAVE_NETDB_H)
+CHECK_INCLUDE_FILE(netinet/in.h HAVE_NETINET_IN_H)
 CHECK_INCLUDE_FILE(sockLib.h HAVE_SOCKLIB_H)
 CHECK_INCLUDE_FILE(stdarg.h STDC_HEADERS)
 CHECK_INCLUDE_FILE(stdlib.h HAVE_STDLIB_H)
diff --git a/thirdparty/ffs/ffs/cmake/BisonFlexSub.cmake b/thirdparty/ffs/ffs/cmake/BisonFlexSub.cmake
index 40e2e275f5..dedcd711c6 100644
--- a/thirdparty/ffs/ffs/cmake/BisonFlexSub.cmake
+++ b/thirdparty/ffs/ffs/cmake/BisonFlexSub.cmake
@@ -1,7 +1,8 @@
 FUNCTION (SETUP_BISON_FLEX_SUB)
 
 IF ((${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") OR
-   (${CMAKE_SYSTEM_NAME} STREQUAL "Linux"))
+   (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") OR
+   (${CMAKE_SYSTEM_NAME} STREQUAL "FreeBSD"))
    set (BISON_FLEX_PRECOMPILE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/cod/pregen_source/Linux")
 elseif (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
    set (BISON_FLEX_PRECOMPILE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/cod/pregen_source/Windows")
diff --git a/thirdparty/ffs/ffs/ffs/tests/context_test.c b/thirdparty/ffs/ffs/ffs/tests/context_test.c
index 19af8d4442..fe178133c3 100755
--- a/thirdparty/ffs/ffs/ffs/tests/context_test.c
+++ b/thirdparty/ffs/ffs/ffs/tests/context_test.c
@@ -4,6 +4,9 @@
 #ifdef STDC_HEADERS
 #include 
 #endif
+#ifdef HAVE_NETINET_IN_H
+#include 
+#endif
 #include 
 #ifdef HAVE_UNISTD_H
 #include