From dc789c0ab056a3e63ff49e605651a8aec3676927 Mon Sep 17 00:00:00 2001 From: Norbert Podhorszki Date: Sun, 14 Jan 2024 16:38:57 -0500 Subject: [PATCH 01/19] Work towards reading data with stride - new Variable functions SetStride() and Selection() to set stride and to get back the final selection after setting a stride - 1D example for striding - groundwork towards supporting this in BP5 reader --- bindings/CXX11/adios2/cxx11/Variable.cpp | 13 ++ bindings/CXX11/adios2/cxx11/Variable.h | 15 +- examples/basics/CMakeLists.txt | 2 + examples/basics/stridedRead/CMakeLists.txt | 21 +++ examples/basics/stridedRead/stridedRead1D.cpp | 146 +++++++++++++++++ examples/basics/stridedRead/stridedRead2D.cpp | 150 ++++++++++++++++++ source/adios2/common/ADIOSTypes.cpp | 12 ++ source/adios2/common/ADIOSTypes.h | 12 ++ source/adios2/core/Variable.cpp | 6 + source/adios2/core/Variable.h | 6 +- source/adios2/core/Variable.tcc | 26 ++- source/adios2/core/VariableBase.cpp | 71 ++++++++- source/adios2/core/VariableBase.h | 12 ++ source/adios2/helper/adiosMath.cpp | 69 ++++++++ source/adios2/helper/adiosMath.h | 24 ++- source/adios2/helper/adiosType.cpp | 33 ++++ source/adios2/helper/adiosType.h | 10 ++ .../toolkit/format/bp5/BP5Deserializer.cpp | 135 +++++++++++++++- 18 files changed, 749 insertions(+), 14 deletions(-) create mode 100644 examples/basics/stridedRead/CMakeLists.txt create mode 100644 examples/basics/stridedRead/stridedRead1D.cpp create mode 100644 examples/basics/stridedRead/stridedRead2D.cpp diff --git a/bindings/CXX11/adios2/cxx11/Variable.cpp b/bindings/CXX11/adios2/cxx11/Variable.cpp index 2b432112cf..0cbaabe7a2 100644 --- a/bindings/CXX11/adios2/cxx11/Variable.cpp +++ b/bindings/CXX11/adios2/cxx11/Variable.cpp @@ -85,6 +85,13 @@ namespace adios2 } \ \ template <> \ + void Variable::SetStride(const Dims &stride, const DoubleMatrix &stencil) \ + { \ + helper::CheckForNullptr(m_Variable, "in call to Variable::SetStride"); \ + return m_Variable->SetStride(stride, stencil); \ + } \ + \ + template <> \ size_t Variable::SelectionSize() const \ { \ helper::CheckForNullptr(m_Variable, "in call to Variable::SelectionSize"); \ @@ -154,6 +161,12 @@ namespace adios2 } \ \ template <> \ + Box Variable::Selection() const \ + { \ + helper::CheckForNullptr(m_Variable, "in call to Variable::Selection"); \ + return m_Variable->Selection(); \ + } \ + template <> \ size_t Variable::Steps() const \ { \ helper::CheckForNullptr(m_Variable, "in call to Variable::Steps"); \ diff --git a/bindings/CXX11/adios2/cxx11/Variable.h b/bindings/CXX11/adios2/cxx11/Variable.h index 2dd77adf9a..6ce93b084a 100644 --- a/bindings/CXX11/adios2/cxx11/Variable.h +++ b/bindings/CXX11/adios2/cxx11/Variable.h @@ -229,9 +229,16 @@ class Variable */ void SetAccuracy(const adios2::Accuracy &a); + /** + * Set striding before reading + * @param stride = vector of stride in each dimension + * @param stencil = n-dim matrix for computing each data point + */ + void SetStride(const Dims &stride, const DoubleMatrix &stencil = {}); + /** * Returns the number of elements required for pre-allocation based on - * current count and stepsCount + * current count, stepsCount, and stride * @return elements of type T required for pre-allocation */ size_t SelectionSize() const; @@ -286,6 +293,12 @@ class Variable */ adios2::Dims Count() const; + /** + * Inspects selection with striding but without steps + * @return pair of start, count vector, that is the actual selection + * in global space after applying striding + */ + Box Selection() const; /** * For readRandomAccess mode, inspect the number of available steps * @return available steps diff --git a/examples/basics/CMakeLists.txt b/examples/basics/CMakeLists.txt index 972125c4df..227446362e 100644 --- a/examples/basics/CMakeLists.txt +++ b/examples/basics/CMakeLists.txt @@ -12,3 +12,5 @@ add_subdirectory(localArray) add_subdirectory(queryWorker) add_subdirectory(values) add_subdirectory(variablesShapes) +add_subdirectory(stridedRead) + diff --git a/examples/basics/stridedRead/CMakeLists.txt b/examples/basics/stridedRead/CMakeLists.txt new file mode 100644 index 0000000000..a5cffb5a4e --- /dev/null +++ b/examples/basics/stridedRead/CMakeLists.txt @@ -0,0 +1,21 @@ +#------------------------------------------------------------------------------# +# Distributed under the OSI-approved Apache License, Version 2.0. See +# accompanying file Copyright.txt for details. +#------------------------------------------------------------------------------# + +cmake_minimum_required(VERSION 3.12) +project(ADIOS2BasicsStridedReadExample) + +if(NOT TARGET adios2_core) + set(_components CXX) + find_package(ADIOS2 REQUIRED COMPONENTS ${_components}) +endif() + +add_executable(adios2_basics_stridedRead1D stridedRead1D.cpp) +target_link_libraries(adios2_basics_stridedRead1D adios2::cxx11 adios2_core) +install(TARGETS adios2_basics_variablesShapes RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) + +add_executable(adios2_basics_stridedRead2D stridedRead2D.cpp) +target_link_libraries(adios2_basics_stridedRead2D adios2::cxx11 adios2_core) +install(TARGETS adios2_basics_variablesShapes RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) + diff --git a/examples/basics/stridedRead/stridedRead1D.cpp b/examples/basics/stridedRead/stridedRead1D.cpp new file mode 100644 index 0000000000..506360b8be --- /dev/null +++ b/examples/basics/stridedRead/stridedRead1D.cpp @@ -0,0 +1,146 @@ +/* + * Distributed under the OSI-approved Apache License, Version 2.0. See + * accompanying file Copyright.txt for details. + * + * * stridedRead1D.cpp : example to read from 1D array with a stride + * + * Created on: Jan 10, 2024 + * Author: Norbert Podhorszki + */ + +#include //std::size_t +#include // std::setprecision +#include // std::cout +#include // std::numeric_limits +#include +#include //std::iota +#include //std::exception + +#include + +constexpr double TWOPI = 2.0 * M_PI; + +void writer(adios2::ADIOS &adios, const std::size_t nx) +{ + auto lf_compute = [](const std::size_t nx) -> std::vector { + const double sp = TWOPI / nx; + std::vector array(nx); + for (size_t i = 0; i < nx; ++i) + { + array[i] = cos(i * sp); + } + return array; + }; + + adios2::IO io = adios.DeclareIO("stridedRead1D-writer"); + + const adios2::Dims shape = {static_cast(nx)}; + adios2::Variable varGlobal1D = + io.DefineVariable("global1d", shape, {0}, shape /*, adios2::ConstantDims*/); + + adios2::Variable varLocal1D = + io.DefineVariable("local1d", {}, {}, shape, adios2::ConstantDims); + + adios2::Engine writer = io.Open("stridedRead1D.bp", adios2::Mode::Write); + + const std::vector array = lf_compute(nx); + + writer.BeginStep(); + + // let's write the global array as two separate writes of halfs + varGlobal1D.SetSelection({{0}, {nx / 2}}); + writer.Put(varGlobal1D, array.data()); + varGlobal1D.SetSelection({{nx / 2}, {nx - (nx / 2)}}); + writer.Put(varGlobal1D, array.data() + (nx / 2)); + + writer.Put(varLocal1D, array.data()); + writer.EndStep(); + + writer.Close(); +} + +void reader(adios2::ADIOS &adios) +{ + adios2::IO io = adios.DeclareIO("stridedRead1D-reader"); + adios2::Engine reader = io.Open("stridedRead1D.bp", adios2::Mode::Read); + reader.BeginStep(); + + adios2::DoubleMatrix stencil1D({3}, {0.25, 0.5, 0.25}); + + adios2::Variable varGlobal1D = io.InquireVariable("global1d"); + std::vector global1D; + varGlobal1D.SetSelection({{10}, {varGlobal1D.Shape()[0] - 20}}); + varGlobal1D.SetStride({2}, stencil1D); + size_t sg = varGlobal1D.SelectionSize(); + global1D.resize(sg); + + { + // details about the selection after striding + auto sel = varGlobal1D.Selection(); + std::cout << "Global array selection: size is " << sg << " start = { "; + for (auto s : sel.first) + { + std::cout << s << " "; + } + std::cout << "}, count = { "; + for (auto s : sel.second) + { + std::cout << s << " "; + } + std::cout << "}" << std::endl; + } + + reader.Get(varGlobal1D, global1D); + +#if 0 + adios2::Variable varLocal1D = io.InquireVariable("local1d"); + std::vector local1D; + varLocal1D.SetBlockSelection(0); + double sixteenth = 1.0 / 16; + /*varLocal1D.SetStride({3}, + adios2::DoubleMatrix({5}, {sixteenth, 3 * sixteenth, 8 * sixteenth, + 3 * sixteenth, sixteenth}));*/ + varLocal1D.SetStride({3}); + size_t sl = varLocal1D.SelectionSize(); + std::cout << "Local array selection size is " << sl << std::endl; + local1D.resize(sl); + reader.Get(varLocal1D, local1D); +#endif + + reader.EndStep(); + + std::cout << "Global array read with stride = {" << std::setprecision(2); + for (auto d : global1D) + { + std::cout << d << " "; + } + std::cout << "}" << std::endl; + +#if 0 + std::cout << "Local array read with stride = {" << std::setprecision(2); + for (auto d : local1D) + { + std::cout << d << " "; + } + std::cout << "}" << std::endl; +#endif + + reader.Close(); +} + +int main(int argc, char *argv[]) +{ + try + { + constexpr std::size_t nx = 100; + adios2::ADIOS adios; + writer(adios, nx); + reader(adios); + } + catch (const std::exception &e) + { + std::cout << "ERROR: ADIOS2 exception: " << e.what() << "\n"; + } + + return 0; +} diff --git a/examples/basics/stridedRead/stridedRead2D.cpp b/examples/basics/stridedRead/stridedRead2D.cpp new file mode 100644 index 0000000000..121e5f57be --- /dev/null +++ b/examples/basics/stridedRead/stridedRead2D.cpp @@ -0,0 +1,150 @@ +/* + * Distributed under the OSI-approved Apache License, Version 2.0. See + * accompanying file Copyright.txt for details. + * + * * stridedRead2D.cpp : example to read from 2D array with a stride + * + * Created on: Jan 14, 2024 + * Author: Norbert Podhorszki + */ + +#include //std::size_t +#include // std::setprecision +#include // std::cout +#include // std::numeric_limits +#include +#include //std::iota +#include //std::exception + +#include + +constexpr double TWOPI = 2.0 * M_PI; + +void writer(adios2::ADIOS &adios, const std::size_t nx, const std::size_t ny) +{ + auto lf_compute = [](const std::size_t nx, const std::size_t ny) -> std::vector { + const double sp = TWOPI / nx; + std::vector array(nx * ny); + size_t pos = 0; + for (size_t i = 0; i < nx; ++i) + { + double c = cos(i * sp); + for (size_t j = 0; j < ny; ++j) + { + array[pos] = c + sin(j * sp); + ++pos; + } + } + return array; + }; + + adios2::IO io = adios.DeclareIO("stridedRead2D-writer"); + + const adios2::Dims shape = {nx, ny}; + adios2::Variable varGlobal2D = + io.DefineVariable("global2d", shape, {0, 0}, shape /*, adios2::ConstantDims*/); + + adios2::Engine writer = io.Open("stridedRead2D.bp", adios2::Mode::Write); + + const std::vector array = lf_compute(nx, ny); + + writer.BeginStep(); + + // let's write the global array as four separate writes of quarters + size_t qx1 = (nx / 2); + size_t qx2 = nx - qx1; + size_t qy1 = (ny / 2); + size_t qy2 = ny - qy1; + + varGlobal2D.SetSelection({{0, 0}, {qx1, qx2}}); + varGlobal2D.SetMemorySelection({{0, 0}, {nx, ny}}); + writer.Put(varGlobal2D, array.data()); + + varGlobal2D.SetSelection({{0, qy1}, {qx1, qy2}}); + varGlobal2D.SetMemorySelection({{0, qy1}, {nx, ny}}); + writer.Put(varGlobal2D, array.data()); + + varGlobal2D.SetSelection({{qx1, 0}, {qx2, qx2}}); + varGlobal2D.SetMemorySelection({{qx1, 0}, {nx, ny}}); + writer.Put(varGlobal2D, array.data()); + + varGlobal2D.SetSelection({{qx1, qy1}, {qx2, qy2}}); + varGlobal2D.SetMemorySelection({{qx1, qy1}, {nx, ny}}); + writer.Put(varGlobal2D, array.data()); + + writer.EndStep(); + + writer.Close(); +} + +constexpr double sixteenth = 1.0 / 16; +constexpr double eighth = 1.0 / 8; +const std::vector M2 = { + 0.0, eighth, 0.0, // 1 + eighth, 4 * eighth, eighth, // 2 + 0.0, eighth, 0.0, // 3 +}; + +void reader(adios2::ADIOS &adios) +{ + adios2::IO io = adios.DeclareIO("stridedRead2D-reader"); + adios2::Engine reader = io.Open("stridedRead2D.bp", adios2::Mode::Read); + reader.BeginStep(); + + adios2::DoubleMatrix stencil2D({3, 3}, M2); + + adios2::Variable varGlobal2D = io.InquireVariable("global2d"); + std::vector global2D; + varGlobal2D.SetSelection( + {{10, 10}, {varGlobal2D.Shape()[0] - 20, varGlobal2D.Shape()[1] - 20}}); + varGlobal2D.SetStride({2, 2}, stencil2D); + size_t sg = varGlobal2D.SelectionSize(); + global2D.resize(sg); + + { + // details about the selection after striding + auto sel = varGlobal2D.Selection(); + std::cout << "Global array selection: size is " << sg << " start = { "; + for (auto s : sel.first) + { + std::cout << s << " "; + } + std::cout << "}, count = { "; + for (auto s : sel.second) + { + std::cout << s << " "; + } + std::cout << "}" << std::endl; + } + + reader.Get(varGlobal2D, global2D); + + reader.EndStep(); + + std::cout << "Global array read with stride = {" << std::setprecision(2); + for (auto d : global2D) + { + std::cout << d << " "; + } + std::cout << "}" << std::endl; + + reader.Close(); +} + +int main(int argc, char *argv[]) +{ + try + { + constexpr std::size_t nx = 100; + constexpr std::size_t ny = 100; + adios2::ADIOS adios; + writer(adios, nx, ny); + reader(adios); + } + catch (const std::exception &e) + { + std::cout << "ERROR: ADIOS2 exception: " << e.what() << "\n"; + } + + return 0; +} diff --git a/source/adios2/common/ADIOSTypes.cpp b/source/adios2/common/ADIOSTypes.cpp index 5fbdb69216..e603250654 100644 --- a/source/adios2/common/ADIOSTypes.cpp +++ b/source/adios2/common/ADIOSTypes.cpp @@ -461,4 +461,16 @@ void PrintMVI(std::ostream &os, const MinVarInfo &mvi) os << std::endl; } +DoubleMatrix::DoubleMatrix(){}; +DoubleMatrix::DoubleMatrix(const Dims &dims, const std::vector &d) : shape(dims), data(d){}; +DoubleMatrix::DoubleMatrix(const Dims &dims, const double *d) : shape(dims) +{ + size_t n = 0; + for (const auto d : dims) + { + n *= d; + } + data = std::vector(d, d + n); +}; + } // end namespace adios2 diff --git a/source/adios2/common/ADIOSTypes.h b/source/adios2/common/ADIOSTypes.h index 5085ccf295..82a9351a12 100644 --- a/source/adios2/common/ADIOSTypes.h +++ b/source/adios2/common/ADIOSTypes.h @@ -21,6 +21,7 @@ #include #include #include +#include //std::accumulate #include #include #include //std::pair @@ -325,6 +326,17 @@ struct Accuracy constexpr double L2_norm = 0.0; constexpr double Linf_norm = std::numeric_limits::infinity(); +/** Reading with stride **/ + +struct DoubleMatrix +{ + Dims shape; + std::vector data; + DoubleMatrix(); + DoubleMatrix(const Dims &dims, const std::vector &d); + DoubleMatrix(const Dims &dims, const double *d); +}; + /** * Return the actual size in bytes of elements of the given type. Returns -1 * for strings. diff --git a/source/adios2/core/Variable.cpp b/source/adios2/core/Variable.cpp index 26f6f11215..ab34b6cf82 100644 --- a/source/adios2/core/Variable.cpp +++ b/source/adios2/core/Variable.cpp @@ -76,6 +76,12 @@ namespace core } \ \ template <> \ + Box Variable::Selection() const \ + { \ + return DoSelection(); \ + } \ + \ + template <> \ std::pair Variable::MinMax(const size_t step) const \ { \ return DoMinMax(step); \ diff --git a/source/adios2/core/Variable.h b/source/adios2/core/Variable.h index 2f829363c2..1af78ed6f2 100644 --- a/source/adios2/core/Variable.h +++ b/source/adios2/core/Variable.h @@ -106,7 +106,9 @@ class Variable : public VariableBase Dims Count() const; - size_t SelectionSize() const; + size_t SelectionSize() const; // selection size with striding AND steps + + Box Selection() const; // selection with striding but without steps std::pair MinMax(const size_t step = adios2::DefaultSizeT) const; @@ -121,6 +123,8 @@ class Variable : public VariableBase size_t DoSelectionSize() const; + Box DoSelection() const; + std::pair DoMinMax(const size_t step) const; std::vector::BPInfo>> DoAllStepsBlocksInfo() const; diff --git a/source/adios2/core/Variable.tcc b/source/adios2/core/Variable.tcc index 85875a8073..647d804c1a 100644 --- a/source/adios2/core/Variable.tcc +++ b/source/adios2/core/Variable.tcc @@ -16,6 +16,8 @@ #include "adios2/core/Engine.h" #include "adios2/helper/adiosFunctions.h" +#include // std::div + namespace adios2 { namespace core @@ -99,7 +101,29 @@ Dims Variable::DoCount() const template size_t Variable::DoSelectionSize() const { - return helper::GetTotalSize(DoCount()) * m_StepsCount; + Box box = DoSelection(); + return helper::GetTotalSize(box.second) * m_StepsCount; +} + +template +Box Variable::DoSelection() const +{ + Dims countWithoutStride = DoCount(); + Dims start; + if (m_Start.empty()) + { + start.resize(countWithoutStride.size(), 0); + } + else + { + start = m_Start; + } + + if (m_Stride.empty()) + { + return Box(m_Start, countWithoutStride); + } + return helper::GetStridedSelection(start, countWithoutStride, m_Stride); } template diff --git a/source/adios2/core/VariableBase.cpp b/source/adios2/core/VariableBase.cpp index bd8058cd49..8d695fb182 100644 --- a/source/adios2/core/VariableBase.cpp +++ b/source/adios2/core/VariableBase.cpp @@ -12,7 +12,10 @@ /// \cond EXCLUDE_FROM_DOXYGEN #include //std::count -#include //std::next +#include +#include //std::next +#include //std::accumulate +#include #include //std::invalid_argument /// \endcond @@ -22,6 +25,7 @@ #include "adios2/core/Variable.h" #include "adios2/helper/adiosFunctions.h" //helper::GetTotalSize #include "adios2/helper/adiosString.h" +#include "adios2/helper/adiosType.h" #include "adios2/operator/OperatorFactory.h" #include "adios2/helper/adiosGPUFunctions.h" @@ -282,6 +286,71 @@ void VariableBase::SetAccuracy(const adios2::Accuracy &a) noexcept adios2::Accuracy VariableBase::GetAccuracy() const noexcept { return m_AccuracyProvided; } adios2::Accuracy VariableBase::GetAccuracyRequested() const noexcept { return m_AccuracyRequested; } +void VariableBase::SetStride(const Dims &stride, const DoubleMatrix &stencil) +{ + m_Stride = stride; + size_t ndim; + if (m_ShapeID == ShapeID::GlobalArray) + { + ndim = m_Shape.size(); + } + else if (m_ShapeID == ShapeID::LocalArray) + { + ndim = stride.size(); // TODO!!! Ho do we get the ndim of a local array? + } + else + { + helper::Throw("Core", "VariableBase", "SetStride", + "Stride is only allowed for arrays"); + } + + if (stencil.shape.empty()) + { + m_StrideStencil = DoubleMatrix({1}, {1.0}); + } + else + { + m_StrideStencil = stencil; + } + + if (m_Stride.size() != ndim) + { + helper::Throw("Core", "VariableBase", "SetStride", + "invalid stride dimensions " + + std::to_string(stride.size()) + + ", must be equal to the shape of the variable"); + } + + if (helper::IsIntegerType(m_Type)) + { + if (m_StrideStencil.shape.size() != 1 || m_StrideStencil.data.size() != 1 || + m_StrideStencil.data[0] != 1) + { + helper::Throw("Core", "VariableBase", "SetStride", + "invalid stencil for an integer data. " + "It must be adios2::DoubleMatrix({1},{1.0}"); + } + } + else if (helper::IsFloatingPointType(m_Type)) + { + double sum = std::accumulate(m_StrideStencil.data.begin(), m_StrideStencil.data.end(), 0.0); + if (!helper::equal_within_ulps(sum, 1.0)) + { + std::ostringstream s; + s << std::fixed << std::setprecision(16) << sum; + helper::Throw("Core", "VariableBase", "SetStride", + "invalid stencil for an floating point data. " + "The sum of elements is " + + s.str() + " must equal to 1.0"); + } + } + else + { + helper::Throw("Core", "VariableBase", "SetStride", + "Stride is not allowed for non-numeric types"); + } +} + size_t VariableBase::GetAvailableStepsStart() const { return m_AvailableStepsStart; } size_t VariableBase::GetAvailableStepsCount() const { return m_AvailableStepsCount; } diff --git a/source/adios2/core/VariableBase.h b/source/adios2/core/VariableBase.h index dc410ec78d..3b2c3b8bff 100644 --- a/source/adios2/core/VariableBase.h +++ b/source/adios2/core/VariableBase.h @@ -107,6 +107,10 @@ class VariableBase /** provided accuracy */ Accuracy m_AccuracyProvided = {0.0, 0.0, false}; + /** stride for reading data */ + Dims m_Stride; // n-dim sizes + DoubleMatrix m_StrideStencil; // how to calculate points in stride + /** Index to Step and blocks' (inside a step) characteristics position in a * serial metadata buffer *
@@ -205,6 +209,14 @@ class VariableBase
     /** Return the requested accuracy set by user with SetAccuracy */
     adios2::Accuracy GetAccuracyRequested() const noexcept;
 
+    /**
+     * Set striding before reading
+     * @param stride = vector of stride in each dimension
+     * @param stencil = n-dim matrix for computing each data point
+     * @param offset = starting point for striding
+     */
+    void SetStride(const Dims &stride, const DoubleMatrix &stencil = {});
+
     size_t GetAvailableStepsStart() const;
 
     size_t GetAvailableStepsCount() const;
diff --git a/source/adios2/helper/adiosMath.cpp b/source/adios2/helper/adiosMath.cpp
index bcf73b6a0e..aa024d076a 100644
--- a/source/adios2/helper/adiosMath.cpp
+++ b/source/adios2/helper/adiosMath.cpp
@@ -503,5 +503,74 @@ Box GetSubBlock(const Dims &count, const BlockDivisionInfo &info,
     return std::make_pair(sbStart, sbCount);
 }
 
+/*/
+Dims GetStridedSelection(const Dims &count, const Dims &stride, const Dims &offset)
+{
+    Dims sel(count.size());
+    for (size_t i = 0; i < count.size(); ++i)
+    {
+        auto d = std::div((long long)(count[i] - offset[i]), (long long)stride[i]);
+        sel[i] = (size_t)d.quot;
+        if (d.rem)
+        {
+            sel[i]++;
+        };
+    }
+    return sel;
+}
+*/
+
+Box GetStridedSelection(const Dims &start, const Dims &count, const Dims &stride,
+                              const Dims &offset)
+{
+    Box box;
+    Dims &st = box.first;
+    Dims &ct = box.second;
+    st.resize(count.size());
+    ct.resize(count.size());
+    for (size_t i = 0; i < count.size(); ++i)
+    {
+        auto d = std::div((long long)(count[i] - offset[i]), (long long)stride[i]);
+        ct[i] = (size_t)d.quot;
+        if (d.rem)
+        {
+            ct[i]++;
+        };
+        st[i] = start[i] + offset[i];
+    }
+    return box;
+}
+
+Box GetStridedSelection(const Dims &start, const Dims &count, const Dims &stride)
+{
+    return GetStridedSelection(start, count, stride, Dims(count.size(), 0ULL));
+}
+
+template 
+bool equal_within_ulps(
+    T x, T y, std::size_t n = 1,
+    typename std::enable_if::is_integer, T>::type * = 0)
+{
+    //  Taken from https://en.cppreference.com/w/cpp/types/numeric_limits/epsilon
+
+    // Since `epsilon()` is the gap size (ULP, unit in the last place)
+    // of floating-point numbers in interval [1, 2), we can scale it to
+    // the gap size in interval [2^e, 2^{e+1}), where `e` is the exponent
+    // of `x` and `y`.
+
+    // If `x` and `y` have different gap sizes (which means they have
+    // different exponents), we take the smaller one. Taking the bigger
+    // one is also reasonable, I guess.
+    const T m = std::min(std::fabs(x), std::fabs(y));
+
+    // Subnormal numbers have fixed exponent, which is `min_exponent - 1`.
+    const int exp = m < std::numeric_limits::min() ? std::numeric_limits::min_exponent - 1
+                                                      : std::ilogb(m);
+
+    // We consider `x` and `y` equal if the difference between them is
+    // within `n` ULPs.
+    return std::fabs(x - y) <= n * std::ldexp(std::numeric_limits::epsilon(), exp);
+}
+
 } // end namespace helper
 } // end namespace adios2
diff --git a/source/adios2/helper/adiosMath.h b/source/adios2/helper/adiosMath.h
index 5c55d2eddc..794e929ba3 100644
--- a/source/adios2/helper/adiosMath.h
+++ b/source/adios2/helper/adiosMath.h
@@ -12,13 +12,14 @@
 #define ADIOS2_HELPER_ADIOSMATH_H_
 
 /// \cond EXCLUDE_FROM_DOXYGEN
+#include 
+#include 
+#include 
 #include 
 /// \endcond
 
 #include "adios2/common/ADIOSTypes.h"
 
-#include 
-
 namespace adios2
 {
 namespace helper
@@ -322,6 +323,25 @@ void GetMinMaxSubblocks(const T *values, const Dims &count, const BlockDivisionI
 template 
 T SetWithinLimit(const T value, const T minValue, const T maxValue);
 
+/* Get the dims from a selection and a stride definition*/
+// Dims GetStridedSelection(const Dims &count, const Dims &stride, const Dims &offset);
+
+Box GetStridedSelection(const Dims &start, const Dims &count, const Dims &stride,
+                              const Dims &offset);
+Box GetStridedSelection(const Dims &start, const Dims &count, const Dims &stride);
+
+/** Equality test for floating point numbers using machine epsilon.
+ * See example in https://en.cppreference.com/w/cpp/types/numeric_limits/epsilon
+ * @brief Return true if the x and y are within n * of the machine epsilon
+ */
+template 
+bool equal_within_ulps(
+    T x, T y, std::size_t n = 1,
+    typename std::enable_if::is_integer, bool>::type * = 0)
+{
+    return true;
+}
+
 } // end namespace helper
 } // end namespace adios2
 
diff --git a/source/adios2/helper/adiosType.cpp b/source/adios2/helper/adiosType.cpp
index 0a2817734e..605d819040 100644
--- a/source/adios2/helper/adiosType.cpp
+++ b/source/adios2/helper/adiosType.cpp
@@ -105,6 +105,39 @@ size_t GetDataTypeSize(DataType type)
     return 0;
 }
 
+bool IsIntegerType(DataType type) noexcept
+{
+    switch (type)
+    {
+    case DataType::Int8:
+    case DataType::Int16:
+    case DataType::Int32:
+    case DataType::Int64:
+    case DataType::UInt8:
+    case DataType::UInt16:
+    case DataType::UInt32:
+    case DataType::UInt64:
+        return true;
+    default:
+        return false;
+    }
+}
+
+bool IsFloatingPointType(DataType type) noexcept
+{
+    switch (type)
+    {
+    case DataType::Float:
+    case DataType::Double:
+    case DataType::FloatComplex:
+    case DataType::DoubleComplex:
+    case DataType::LongDouble:
+        return true;
+    default:
+        return false;
+    }
+}
+
 std::string DimsToCSV(const Dims &dimensions) noexcept
 {
     std::string dimsCSV;
diff --git a/source/adios2/helper/adiosType.h b/source/adios2/helper/adiosType.h
index 24f360d524..3b6e0c3ceb 100644
--- a/source/adios2/helper/adiosType.h
+++ b/source/adios2/helper/adiosType.h
@@ -194,6 +194,16 @@ DataType GetDataTypeFromString(std::string const &) noexcept;
 
 size_t GetDataTypeSize(DataType type);
 
+/**
+ * @return true if DataType is an integer type
+ */
+bool IsIntegerType(DataType type) noexcept;
+
+/**
+ * @return true if DataType is an floating point type
+ */
+bool IsFloatingPointType(DataType type) noexcept;
+
 /**
  * Converts a vector of dimensions to a CSV string
  * @param dims vector of dimensions
diff --git a/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp b/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp
index 0e7574a17c..a5bf1536df 100644
--- a/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp
+++ b/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp
@@ -1579,6 +1579,8 @@ BP5Deserializer::GenerateReadRequests(const bool doAllocTempBuffers, size_t *max
                         RR.DirectToAppMemory = false;
                     else if (VarRec->Operator != NULL)
                         RR.DirectToAppMemory = false;
+                    else if (!VB->m_Stride.empty())
+                        RR.DirectToAppMemory = false;
                     else
                         RR.DirectToAppMemory =
                             IsContiguousTransfer(Req, &writer_meta_base->Offsets[StartDim],
@@ -1702,6 +1704,8 @@ BP5Deserializer::GenerateReadRequests(const bool doAllocTempBuffers, size_t *max
                             RR.ReadLength = EndOffsetInBlock - StartOffsetInBlock;
                             if (Req->MemSpace != MemorySpace::Host)
                                 RR.DirectToAppMemory = false;
+                            else if (!VB->m_Stride.empty())
+                                RR.DirectToAppMemory = false;
                             else
                                 RR.DirectToAppMemory =
                                     IsContiguousTransfer(Req, &writer_meta_base->Offsets[StartDim],
@@ -1746,6 +1750,85 @@ BP5Deserializer::GenerateReadRequests(const bool doAllocTempBuffers, size_t *max
     return Ret;
 }
 
+template 
+void StrideCopyT(const T *in, const CoreDims &inStart, const CoreDims &inCount,
+                 const bool inIsLittleEndian, T *out, const CoreDims &outStart,
+                 const CoreDims &outCount, const bool outIsLittleEndian,
+                 MemorySpace MemSpace = MemorySpace::Host)
+{
+}
+
+static inline void StrideCopy(const DataType dtype, const void *in, const CoreDims &inStart,
+                              const CoreDims &inCount, const bool inIsLittleEndian, void *out,
+                              const CoreDims &outStart, const CoreDims &outCount,
+                              const bool outIsLittleEndian,
+                              MemorySpace MemSpace = MemorySpace::Host)
+{
+    switch (dtype)
+    {
+    case DataType::None:
+        break;
+    case DataType::Char:
+    case DataType::Int8:
+        StrideCopyT((int8_t *)in, inStart, inCount, inIsLittleEndian, (int8_t *)out,
+                            outStart, outCount, outIsLittleEndian, MemSpace);
+        break;
+    case DataType::Int16:
+        StrideCopyT((int16_t *)in, inStart, inCount, inIsLittleEndian, (int16_t *)out,
+                             outStart, outCount, outIsLittleEndian, MemSpace);
+        break;
+    case DataType::Int32:
+        StrideCopyT((int32_t *)in, inStart, inCount, inIsLittleEndian, (int32_t *)out,
+                             outStart, outCount, outIsLittleEndian, MemSpace);
+        break;
+    case DataType::Int64:
+        StrideCopyT((int64_t *)in, inStart, inCount, inIsLittleEndian, (int64_t *)out,
+                             outStart, outCount, outIsLittleEndian, MemSpace);
+        break;
+    case DataType::UInt8:
+        StrideCopyT((uint8_t *)in, inStart, inCount, inIsLittleEndian, (uint8_t *)out,
+                             outStart, outCount, outIsLittleEndian, MemSpace);
+        break;
+    case DataType::UInt16:
+        StrideCopyT((uint16_t *)in, inStart, inCount, inIsLittleEndian, (uint16_t *)out,
+                              outStart, outCount, outIsLittleEndian, MemSpace);
+        break;
+    case DataType::UInt32:
+        StrideCopyT((uint32_t *)in, inStart, inCount, inIsLittleEndian, (uint32_t *)out,
+                              outStart, outCount, outIsLittleEndian, MemSpace);
+        break;
+    case DataType::UInt64:
+        StrideCopyT((uint64_t *)in, inStart, inCount, inIsLittleEndian, (uint64_t *)out,
+                              outStart, outCount, outIsLittleEndian, MemSpace);
+        break;
+    case DataType::Float:
+        StrideCopyT((float *)in, inStart, inCount, inIsLittleEndian, (float *)out, outStart,
+                           outCount, outIsLittleEndian, MemSpace);
+        break;
+    case DataType::Double:
+        StrideCopyT((double *)in, inStart, inCount, inIsLittleEndian, (double *)out,
+                            outStart, outCount, outIsLittleEndian, MemSpace);
+        break;
+    case DataType::LongDouble:
+        StrideCopyT((long double *)in, inStart, inCount, inIsLittleEndian,
+                                 (long double *)out, outStart, outCount, outIsLittleEndian,
+                                 MemSpace);
+        break;
+    case DataType::FloatComplex:
+        StrideCopyT>((std::complex *)in, inStart, inCount,
+                                         inIsLittleEndian, (std::complex *)out, outStart,
+                                         outCount, outIsLittleEndian, MemSpace);
+        break;
+    case DataType::DoubleComplex:
+        StrideCopyT>((std::complex *)in, inStart, inCount,
+                                          inIsLittleEndian, (std::complex *)out, outStart,
+                                          outCount, outIsLittleEndian, MemSpace);
+        break;
+    default:
+        break;
+    }
+}
+
 void BP5Deserializer::FinalizeGet(const ReadRequest &Read, const bool freeAddr)
 {
     auto Req = PendingGetRequests[Read.ReqIndex];
@@ -1770,6 +1853,7 @@ void BP5Deserializer::FinalizeGet(const ReadRequest &Read, const bool freeAddr)
     char *IncomingData = Read.DestinationAddr;
     char *VirtualIncomingData = Read.DestinationAddr - Read.OffsetInBlock;
     std::vector decompressBuffer;
+    VariableBase *VB = static_cast(((struct BP5VarRec *)Req.VarRec)->Variable);
     if (((struct BP5VarRec *)Req.VarRec)->Operator != NULL)
     {
         size_t DestSize = ((struct BP5VarRec *)Req.VarRec)->ElementSize;
@@ -1781,7 +1865,7 @@ void BP5Deserializer::FinalizeGet(const ReadRequest &Read, const bool freeAddr)
 
         // Get the operator of the variable if exists or create one
         std::shared_ptr op = nullptr;
-        VariableBase *VB = static_cast(((struct BP5VarRec *)Req.VarRec)->Variable);
+
         if (!VB->m_Operations.empty())
         {
             op = VB->m_Operations[0];
@@ -1831,13 +1915,13 @@ void BP5Deserializer::FinalizeGet(const ReadRequest &Read, const bool freeAddr)
         }
     }
 
-    VariableBase *VB = static_cast(((struct BP5VarRec *)Req.VarRec)->Variable);
     DimsArray inStart(DimCount, RankOffset);
     DimsArray inCount(DimCount, RankSize);
     DimsArray outStart(DimCount, SelOffset);
     DimsArray outCount(DimCount, SelSize);
     DimsArray outMemStart(VB->m_MemoryStart);
     DimsArray outMemCount(VB->m_MemoryCount);
+
     if (!m_ReaderIsRowMajor)
     {
         std::reverse(inStart.begin(), inStart.end());
@@ -1848,6 +1932,34 @@ void BP5Deserializer::FinalizeGet(const ReadRequest &Read, const bool freeAddr)
         std::reverse(outMemCount.begin(), outMemCount.end());
     }
 
+    /* Perform Striding now if needed */
+    char *stridedData = nullptr;
+    bool freeStridedData = false;
+    if (!VB->m_Stride.empty())
+    {
+        char *dataptr = VirtualIncomingData;
+        // TODO: calculate stride offset from the stride and
+        // from the position of this block inside the selection
+        Dims strideOffset(DimCount, 0ULL); // FIXME
+        Box stridedBox =
+            helper::GetStridedSelection(Req.Start, Req.Count, VB->m_Stride, strideOffset);
+        Dims &st = stridedBox.first;
+        Dims &ct = stridedBox.second;
+        size_t nElems = helper::GetTotalSize(stridedBox.second);
+        stridedData = (char *)malloc(nElems * ElementSize);
+        freeStridedData = true;
+
+        StrideCopy(((struct BP5VarRec *)Req.VarRec)->Type, dataptr, inStart, inCount, true,
+                   stridedData, st, ct, true, Req.MemSpace);
+
+        stridedData = VirtualIncomingData;
+        freeStridedData = false;
+    }
+    else
+    {
+        stridedData = VirtualIncomingData;
+    }
+
     if (VB->m_MemoryStart.size() > 0)
     {
 #ifdef NOTDEF // haven't done endinness for BP5
@@ -1872,7 +1984,10 @@ void BP5Deserializer::FinalizeGet(const ReadRequest &Read, const bool freeAddr)
         const Box BlockBox = helper::StartEndBox(Dims(inStart.begin(), inStart.end()),
                                                        Dims(inCount.begin(), inCount.end()));
         const Box IntersectionBox = helper::IntersectionBox(selectionBox, BlockBox);
+
+        /* FIXME this for strided data !!!*/
         VirtualIncomingData = Read.DestinationAddr; //  Don't do the fake offset thing
+
         helper::DimsArray intersectStart(IntersectionBox.first);
         helper::DimsArray intersectCount(IntersectionBox.second);
         helper::DimsArray blockStart(BlockBox.first);
@@ -1888,20 +2003,24 @@ void BP5Deserializer::FinalizeGet(const ReadRequest &Read, const bool freeAddr)
             intersectStart[d] += VB->m_MemoryStart[d];
             blockStart[d] += VB->m_MemoryStart[d];
         }
-        helper::NdCopy(VirtualIncomingData, intersectStart, intersectCount, true, true,
-                       (char *)Req.Data, intersectStart, intersectCount, true, true, ElementSize,
-                       intersectStart, blockCount, memoryStart, memoryCount, false);
+        helper::NdCopy(stridedData, intersectStart, intersectCount, true, true, (char *)Req.Data,
+                       intersectStart, intersectCount, true, true, ElementSize, intersectStart,
+                       blockCount, memoryStart, memoryCount, false);
     }
     else
     {
-        helper::NdCopy(VirtualIncomingData, inStart, inCount, true, true, (char *)Req.Data,
-                       outStart, outCount, true, true, ElementSize, CoreDims(), CoreDims(),
-                       CoreDims(), CoreDims(), false, Req.MemSpace);
+        helper::NdCopy(stridedData, inStart, inCount, true, true, (char *)Req.Data, outStart,
+                       outCount, true, true, ElementSize, CoreDims(), CoreDims(), CoreDims(),
+                       CoreDims(), false, Req.MemSpace);
     }
     if (freeAddr)
     {
         free((char *)Read.DestinationAddr);
     }
+    if (freeStridedData)
+    {
+        free(stridedData);
+    }
 }
 
 void BP5Deserializer::FinalizeGets(std::vector &Reads)

From 250c39501df79ce510481edce0dff64ae6341a04 Mon Sep 17 00:00:00 2001
From: Norbert Podhorszki 
Date: Mon, 15 Jan 2024 15:52:36 -0500
Subject: [PATCH 02/19] Save work: 2D example does work. Need to make
 StrideCopyT handle dimensions.

---
 examples/basics/stridedRead/stridedRead2D.cpp |  39 +++-
 source/adios2/helper/adiosString.cpp          |  25 +++
 source/adios2/helper/adiosString.h            |  13 ++
 .../toolkit/format/bp5/BP5Deserializer.cpp    | 184 +++++++++++++++---
 4 files changed, 228 insertions(+), 33 deletions(-)

diff --git a/examples/basics/stridedRead/stridedRead2D.cpp b/examples/basics/stridedRead/stridedRead2D.cpp
index 121e5f57be..ff99b3214e 100644
--- a/examples/basics/stridedRead/stridedRead2D.cpp
+++ b/examples/basics/stridedRead/stridedRead2D.cpp
@@ -22,7 +22,7 @@ constexpr double TWOPI = 2.0 * M_PI;
 
 void writer(adios2::ADIOS &adios, const std::size_t nx, const std::size_t ny)
 {
-    auto lf_compute = [](const std::size_t nx, const std::size_t ny) -> std::vector {
+    auto lf_computeTrig = [](const std::size_t nx, const std::size_t ny) -> std::vector {
         const double sp = TWOPI / nx;
         std::vector array(nx * ny);
         size_t pos = 0;
@@ -38,6 +38,22 @@ void writer(adios2::ADIOS &adios, const std::size_t nx, const std::size_t ny)
         return array;
     };
 
+    auto lf_computeID = [](const std::size_t nx, const std::size_t ny) -> std::vector {
+        std::vector array(nx * ny);
+        size_t pos = 0;
+        for (size_t i = 0; i < nx; ++i)
+        {
+            double c = i * 1.0;
+            for (size_t j = 0; j < ny; ++j)
+            {
+                array[pos] = c;
+                c += 0.01;
+                pos++;
+            }
+        }
+        return array;
+    };
+
     adios2::IO io = adios.DeclareIO("stridedRead2D-writer");
 
     const adios2::Dims shape = {nx, ny};
@@ -46,7 +62,7 @@ void writer(adios2::ADIOS &adios, const std::size_t nx, const std::size_t ny)
 
     adios2::Engine writer = io.Open("stridedRead2D.bp", adios2::Mode::Write);
 
-    const std::vector array = lf_compute(nx, ny);
+    const std::vector array = lf_computeID(nx, ny);
 
     writer.BeginStep();
 
@@ -88,6 +104,7 @@ const std::vector M2 = {
 void reader(adios2::ADIOS &adios)
 {
     adios2::IO io = adios.DeclareIO("stridedRead2D-reader");
+    io.SetParameter("Threads", "1");
     adios2::Engine reader = io.Open("stridedRead2D.bp", adios2::Mode::Read);
     reader.BeginStep();
 
@@ -96,14 +113,14 @@ void reader(adios2::ADIOS &adios)
     adios2::Variable varGlobal2D = io.InquireVariable("global2d");
     std::vector global2D;
     varGlobal2D.SetSelection(
-        {{10, 10}, {varGlobal2D.Shape()[0] - 20, varGlobal2D.Shape()[1] - 20}});
-    varGlobal2D.SetStride({2, 2}, stencil2D);
+        {{11, 10}, {varGlobal2D.Shape()[0] - 20, varGlobal2D.Shape()[1] - 20}});
+    varGlobal2D.SetStride({2, 3}, stencil2D);
     size_t sg = varGlobal2D.SelectionSize();
     global2D.resize(sg);
 
+    auto sel = varGlobal2D.Selection();
     {
         // details about the selection after striding
-        auto sel = varGlobal2D.Selection();
         std::cout << "Global array selection: size is " << sg << " start = { ";
         for (auto s : sel.first)
         {
@@ -121,10 +138,16 @@ void reader(adios2::ADIOS &adios)
 
     reader.EndStep();
 
-    std::cout << "Global array read with stride = {" << std::setprecision(2);
-    for (auto d : global2D)
+    std::cout << "Global array read with stride = {\n  ";
+    size_t pos = 0;
+    for (size_t i = 0; i < sel.second[0]; ++i)
     {
-        std::cout << d << " ";
+        // size_t pos = i * sel.second[1];
+        for (size_t j = 0; j < sel.second[1]; ++j)
+        {
+            std::cout << global2D[pos++] << " ";
+        }
+        std::cout << "\n  ";
     }
     std::cout << "}" << std::endl;
 
diff --git a/source/adios2/helper/adiosString.cpp b/source/adios2/helper/adiosString.cpp
index e160899404..3ee9f736ea 100644
--- a/source/adios2/helper/adiosString.cpp
+++ b/source/adios2/helper/adiosString.cpp
@@ -361,6 +361,31 @@ std::string DimsToString(const Dims &dimensions)
     return dimensionsString;
 }
 
+std::string DimsToString(const size_t ndim, const std::size_t *dimensions)
+{
+    // std::string dimensionsString("Dims(" + std::to_string(ndim) + "):[");
+    std::string dimensionsString("[");
+
+    for (size_t i = 0; i < ndim; ++i)
+    {
+        dimensionsString += std::to_string(dimensions[i]) + ", ";
+    }
+    dimensionsString.pop_back();
+    dimensionsString.pop_back();
+    dimensionsString += "]";
+    return dimensionsString;
+}
+
+std::string DimsToString(const CoreDims &dimensions)
+{
+    return DimsToString(dimensions.size(), dimensions.begin());
+}
+
+std::string BoxToString(const Box &box)
+{
+    return "Box(" + DimsToString(box.first) + ", " + DimsToString(box.second) + ")";
+}
+
 Dims StringToDims(const std::string &dimensions)
 {
     std::vector shape;
diff --git a/source/adios2/helper/adiosString.h b/source/adios2/helper/adiosString.h
index cba94dd2b1..4c43318c12 100644
--- a/source/adios2/helper/adiosString.h
+++ b/source/adios2/helper/adiosString.h
@@ -19,6 +19,7 @@
 /// \endcond
 
 #include "adios2/common/ADIOSTypes.h"
+#include "adios2/helper/adiosType.h"
 
 namespace adios2
 {
@@ -131,6 +132,18 @@ std::string DimsToString(const Dims &dimensions);
 
 Dims StringToDims(const std::string &dimensions);
 
+/**
+ * Returns a single string with dimension values
+ * @param ndim input
+ * @param dimensions input
+ * @return string dimensions values
+ */
+std::string DimsToString(const size_t ndim, const std::size_t *dimensions);
+
+std::string DimsToString(const CoreDims &dimensions);
+
+std::string BoxToString(const Box &box);
+
 /**
  * Sets global name: prefix + separator + localName. If prefix is empty it
  * returns the localName as-is.
diff --git a/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp b/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp
index a5bf1536df..0917f0f1f7 100644
--- a/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp
+++ b/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp
@@ -34,6 +34,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1750,19 +1751,60 @@ BP5Deserializer::GenerateReadRequests(const bool doAllocTempBuffers, size_t *max
     return Ret;
 }
 
+template 
+static void print2D(const std::string name, const T *in, const CoreDims &count)
+{
+    std::cout << name << " = {\n  "; // << std::setprecision(2);
+    size_t pos = 0;
+    for (size_t i = 0; i < count[0]; ++i)
+    {
+        for (size_t j = 0; j < count[1]; ++j)
+        {
+            std::cout << in[pos] << " ";
+            ++pos;
+        }
+        std::cout << "\n  ";
+    }
+    std::cout << "}" << std::endl;
+}
+
 template 
 void StrideCopyT(const T *in, const CoreDims &inStart, const CoreDims &inCount,
                  const bool inIsLittleEndian, T *out, const CoreDims &outStart,
                  const CoreDims &outCount, const bool outIsLittleEndian,
+                 const CoreDims &strideStart, const CoreDims &strideCount,
                  MemorySpace MemSpace = MemorySpace::Host)
 {
+    std::cout << "StrideCopyT: inStart = " << DimsToString(inStart)
+              << " inCount = " << DimsToString(inCount) << " outStart = " << DimsToString(outStart)
+              << " outCount = " << DimsToString(outCount)
+              << " strideStart = " << DimsToString(strideStart)
+              << " srideCount = " << DimsToString(strideCount) << std::endl;
+    print2D("Incoming block", in, inCount);
+
+    /* *in points to very first point of data in N-dim space where striding starts */
+    size_t outPos = 0;
+    for (size_t i = 0; i < outCount[0]; ++i)
+    {
+        size_t rowIn = strideStart[0] + i * strideCount[0];
+        size_t inPos = rowIn * inCount[1] + strideStart[1];
+        for (size_t j = 0; j < outCount[1]; ++j)
+        {
+            out[outPos] = in[inPos];
+            ++outPos;
+            inPos += strideCount[1];
+        }
+    }
+
+    print2D("Outgoing block", out, outCount);
+    std::cout << std::endl;
 }
 
 static inline void StrideCopy(const DataType dtype, const void *in, const CoreDims &inStart,
                               const CoreDims &inCount, const bool inIsLittleEndian, void *out,
                               const CoreDims &outStart, const CoreDims &outCount,
-                              const bool outIsLittleEndian,
-                              MemorySpace MemSpace = MemorySpace::Host)
+                              const bool outIsLittleEndian, const CoreDims &strideStart,
+                              const CoreDims &strideCount, MemorySpace MemSpace = MemorySpace::Host)
 {
     switch (dtype)
     {
@@ -1771,58 +1813,69 @@ static inline void StrideCopy(const DataType dtype, const void *in, const CoreDi
     case DataType::Char:
     case DataType::Int8:
         StrideCopyT((int8_t *)in, inStart, inCount, inIsLittleEndian, (int8_t *)out,
-                            outStart, outCount, outIsLittleEndian, MemSpace);
+                            outStart, outCount, outIsLittleEndian, strideStart, strideCount,
+                            MemSpace);
         break;
     case DataType::Int16:
         StrideCopyT((int16_t *)in, inStart, inCount, inIsLittleEndian, (int16_t *)out,
-                             outStart, outCount, outIsLittleEndian, MemSpace);
+                             outStart, outCount, outIsLittleEndian, strideStart, strideCount,
+                             MemSpace);
         break;
     case DataType::Int32:
         StrideCopyT((int32_t *)in, inStart, inCount, inIsLittleEndian, (int32_t *)out,
-                             outStart, outCount, outIsLittleEndian, MemSpace);
+                             outStart, outCount, outIsLittleEndian, strideStart, strideCount,
+                             MemSpace);
         break;
     case DataType::Int64:
         StrideCopyT((int64_t *)in, inStart, inCount, inIsLittleEndian, (int64_t *)out,
-                             outStart, outCount, outIsLittleEndian, MemSpace);
+                             outStart, outCount, outIsLittleEndian, strideStart, strideCount,
+                             MemSpace);
         break;
     case DataType::UInt8:
         StrideCopyT((uint8_t *)in, inStart, inCount, inIsLittleEndian, (uint8_t *)out,
-                             outStart, outCount, outIsLittleEndian, MemSpace);
+                             outStart, outCount, outIsLittleEndian, strideStart, strideCount,
+                             MemSpace);
         break;
     case DataType::UInt16:
         StrideCopyT((uint16_t *)in, inStart, inCount, inIsLittleEndian, (uint16_t *)out,
-                              outStart, outCount, outIsLittleEndian, MemSpace);
+                              outStart, outCount, outIsLittleEndian, strideStart, strideCount,
+                              MemSpace);
         break;
     case DataType::UInt32:
         StrideCopyT((uint32_t *)in, inStart, inCount, inIsLittleEndian, (uint32_t *)out,
-                              outStart, outCount, outIsLittleEndian, MemSpace);
+                              outStart, outCount, outIsLittleEndian, strideStart, strideCount,
+                              MemSpace);
         break;
     case DataType::UInt64:
         StrideCopyT((uint64_t *)in, inStart, inCount, inIsLittleEndian, (uint64_t *)out,
-                              outStart, outCount, outIsLittleEndian, MemSpace);
+                              outStart, outCount, outIsLittleEndian, strideStart, strideCount,
+                              MemSpace);
         break;
     case DataType::Float:
         StrideCopyT((float *)in, inStart, inCount, inIsLittleEndian, (float *)out, outStart,
-                           outCount, outIsLittleEndian, MemSpace);
+                           outCount, outIsLittleEndian, strideStart, strideCount, MemSpace);
         break;
     case DataType::Double:
         StrideCopyT((double *)in, inStart, inCount, inIsLittleEndian, (double *)out,
-                            outStart, outCount, outIsLittleEndian, MemSpace);
+                            outStart, outCount, outIsLittleEndian, strideStart, strideCount,
+                            MemSpace);
         break;
     case DataType::LongDouble:
         StrideCopyT((long double *)in, inStart, inCount, inIsLittleEndian,
                                  (long double *)out, outStart, outCount, outIsLittleEndian,
-                                 MemSpace);
+                                 strideStart, strideCount, MemSpace);
         break;
     case DataType::FloatComplex:
         StrideCopyT>((std::complex *)in, inStart, inCount,
                                          inIsLittleEndian, (std::complex *)out, outStart,
-                                         outCount, outIsLittleEndian, MemSpace);
+                                         outCount, outIsLittleEndian, strideStart, strideCount,
+                                         MemSpace);
         break;
     case DataType::DoubleComplex:
         StrideCopyT>((std::complex *)in, inStart, inCount,
                                           inIsLittleEndian, (std::complex *)out, outStart,
-                                          outCount, outIsLittleEndian, MemSpace);
+                                          outCount, outIsLittleEndian, strideStart, strideCount,
+                                          MemSpace);
         break;
     default:
         break;
@@ -1937,23 +1990,98 @@ void BP5Deserializer::FinalizeGet(const ReadRequest &Read, const bool freeAddr)
     bool freeStridedData = false;
     if (!VB->m_Stride.empty())
     {
-        char *dataptr = VirtualIncomingData;
-        // TODO: calculate stride offset from the stride and
+        DimsArray strideCount(VB->m_Stride);
+        if (!m_ReaderIsRowMajor)
+        {
+            std::reverse(strideCount.begin(), strideCount.end());
+        }
+
+        const Box selectionBox = helper::StartEndBox(Dims(outStart.begin(), outStart.end()),
+                                                           Dims(outCount.begin(), outCount.end()));
+        const Box BlockBox = helper::StartEndBox(Dims(inStart.begin(), inStart.end()),
+                                                       Dims(inCount.begin(), inCount.end()));
+        // const Box IntersectionBox = helper::IntersectionBox(selectionBox, BlockBox);
+
+        // IntersectionStartCount gives back
+        // - start: offset in global space
+        // - count: the size of box inside the current block
+        // Still need to calculate the stride offset inside the current block
+        const Box IntersectionBox = helper::IntersectionStartCount(
+            Dims(outStart.begin(), outStart.end()), Dims(outCount.begin(), outCount.end()),
+            Dims(inStart.begin(), inStart.end()), Dims(inCount.begin(), inCount.end()));
+
+        std::cout << "selectionBox = " << BoxToString(selectionBox)
+                  << " BlockBox = " << BoxToString(BlockBox)
+                  << " intersectionBox = " << BoxToString(IntersectionBox) << std::endl;
+
+        // calculate stride offset from the stride and
         // from the position of this block inside the selection
         Dims strideOffset(DimCount, 0ULL); // FIXME
-        Box stridedBox =
-            helper::GetStridedSelection(Req.Start, Req.Count, VB->m_Stride, strideOffset);
-        Dims &st = stridedBox.first;
-        Dims &ct = stridedBox.second;
+        for (size_t i = 0; i < DimCount; ++i)
+        {
+            strideOffset[i] = (IntersectionBox.first[i] - outStart[i]) % strideCount[i];
+        }
+        std::cout << "strideOffset = " << DimsToString(strideOffset) << std::endl;
+
+        Box stridedBox = helper::GetStridedSelection(
+            IntersectionBox.first, IntersectionBox.second, VB->m_Stride, strideOffset);
+        DimsArray st(stridedBox.first);
+        DimsArray ct(stridedBox.second);
+
+        // Still need to calculate the offset inside the current block
+        for (size_t i = 0; i < DimCount; ++i)
+        {
+            auto d1 = std::div((long long)(st[i] - outStart[i]), (long long)strideCount[i]);
+            st[i] = (size_t)d1.quot + outStart[i];
+            if (d1.rem)
+            {
+                st[i]++;
+            }
+        }
+
         size_t nElems = helper::GetTotalSize(stridedBox.second);
-        stridedData = (char *)malloc(nElems * ElementSize);
+
+        std::cout << "stridedBox = {" << DimsToString(st) << ", " << DimsToString(ct)
+                  << "}  nElems = " << nElems << std::endl;
+
+        stridedData = (char *)calloc(nElems, ElementSize);
         freeStridedData = true;
 
+        /*
+
+        std::cout << "RankOffset = " << DimsToString(DimCount, RankOffset)
+                  << " RankSize = " << DimsToString(DimCount, RankSize)
+                  << " SelOffset = " << DimsToString(DimCount, SelOffset)
+                  << " SelSize = " << DimsToString(DimCount, SelSize)
+                  << " Req.Start= " << DimsToString(Req.Start)
+                  << " Req.Count = " << DimsToString(Req.Count)
+                  << " Stride = " << DimsToString(VB->m_Stride)
+                  << " strideOffset = " << DimsToString(strideOffset)
+                  << " stride nElems = " << nElems << std::endl;
+
+                std::cout << "inStart = " << DimsToString(inStart) << " inCount = " <<
+           DimsToString(inCount)
+                          << " outStart = " << DimsToString(outStart)
+                          << " outCount = " << DimsToString(outCount) << std::endl;
+        */
+
+        char *dataptr = IncomingData;
         StrideCopy(((struct BP5VarRec *)Req.VarRec)->Type, dataptr, inStart, inCount, true,
-                   stridedData, st, ct, true, Req.MemSpace);
+                   stridedData, st, ct, true, strideOffset, strideCount, Req.MemSpace);
 
-        stridedData = VirtualIncomingData;
-        freeStridedData = false;
+        /* Set the "incoming box" to the strided space instead of the global space
+           for the NdCopy operation */
+        for (size_t i = 0; i < DimCount; ++i)
+        {
+            inStart[i] = st[i];
+            inCount[i] = ct[i];
+            auto d = std::div((long long)(outCount[i]), (long long)strideCount[i]);
+            outCount[i] = (size_t)d.quot;
+            if (d.rem)
+            {
+                outCount[i]++;
+            };
+        }
     }
     else
     {
@@ -2009,6 +2137,12 @@ void BP5Deserializer::FinalizeGet(const ReadRequest &Read, const bool freeAddr)
     }
     else
     {
+
+        std::cout << "NdCopy inStart = " << DimsToString(inStart)
+                  << " inCount = " << DimsToString(inCount)
+                  << " outStart = " << DimsToString(outStart)
+                  << " outCount = " << DimsToString(outCount) << "\n"
+                  << std::endl;
         helper::NdCopy(stridedData, inStart, inCount, true, true, (char *)Req.Data, outStart,
                        outCount, true, true, ElementSize, CoreDims(), CoreDims(), CoreDims(),
                        CoreDims(), false, Req.MemSpace);

From c1a0b5b11657bc6e01c191e0f2d62d195a5f04a8 Mon Sep 17 00:00:00 2001
From: Norbert Podhorszki 
Date: Wed, 17 Jan 2024 15:39:08 -0500
Subject: [PATCH 03/19] Save work: 2D works better now with offsets in blocks
 set correctly. Still no stencil calculation. 1D works too.

---
 examples/basics/stridedRead/stridedRead2D.cpp |  41 +++--
 .../toolkit/format/bp5/BP5Deserializer.cpp    | 154 ++++++++++++++++--
 2 files changed, 171 insertions(+), 24 deletions(-)

diff --git a/examples/basics/stridedRead/stridedRead2D.cpp b/examples/basics/stridedRead/stridedRead2D.cpp
index ff99b3214e..0c4fd4b1ed 100644
--- a/examples/basics/stridedRead/stridedRead2D.cpp
+++ b/examples/basics/stridedRead/stridedRead2D.cpp
@@ -62,7 +62,7 @@ void writer(adios2::ADIOS &adios, const std::size_t nx, const std::size_t ny)
 
     adios2::Engine writer = io.Open("stridedRead2D.bp", adios2::Mode::Write);
 
-    const std::vector array = lf_computeID(nx, ny);
+    const std::vector array = lf_computeTrig(nx, ny);
 
     writer.BeginStep();
 
@@ -109,12 +109,13 @@ void reader(adios2::ADIOS &adios)
     reader.BeginStep();
 
     adios2::DoubleMatrix stencil2D({3, 3}, M2);
+    adios2::Dims stride = {2, 3};
 
     adios2::Variable varGlobal2D = io.InquireVariable("global2d");
     std::vector global2D;
     varGlobal2D.SetSelection(
         {{11, 10}, {varGlobal2D.Shape()[0] - 20, varGlobal2D.Shape()[1] - 20}});
-    varGlobal2D.SetStride({2, 3}, stencil2D);
+    varGlobal2D.SetStride(stride, stencil2D);
     size_t sg = varGlobal2D.SelectionSize();
     global2D.resize(sg);
 
@@ -137,21 +138,37 @@ void reader(adios2::ADIOS &adios)
     reader.Get(varGlobal2D, global2D);
 
     reader.EndStep();
+    reader.Close();
 
-    std::cout << "Global array read with stride = {\n  ";
-    size_t pos = 0;
-    for (size_t i = 0; i < sel.second[0]; ++i)
+    // write out the result
     {
-        // size_t pos = i * sel.second[1];
-        for (size_t j = 0; j < sel.second[1]; ++j)
+        adios2::IO io = adios.DeclareIO("stridedRead2D-write-again");
+        std::string outname =
+            "global2d-" + std::to_string(stride[0]) + "-" + std::to_string(stride[1]);
+        adios2::Variable v = io.DefineVariable(outname, sel.second, {0, 0},
+                                                               sel.second, adios2::ConstantDims);
+
+        adios2::Engine writer = io.Open("stridedRead2D.bp", adios2::Mode::Append);
+        writer.BeginStep();
+        writer.Put(v, global2D.data());
+        writer.EndStep();
+        writer.Close();
+
+        /*
+        std::cout << "Global array read with stride = {\n  ";
+        size_t pos = 0;
+        for (size_t i = 0; i < sel.second[0]; ++i)
         {
-            std::cout << global2D[pos++] << " ";
+            // size_t pos = i * sel.second[1];
+            for (size_t j = 0; j < sel.second[1]; ++j)
+            {
+                std::cout << global2D[pos++] << " ";
+            }
+            std::cout << "\n  ";
         }
-        std::cout << "\n  ";
+        std::cout << "}" << std::endl;
+        */
     }
-    std::cout << "}" << std::endl;
-
-    reader.Close();
 }
 
 int main(int argc, char *argv[])
diff --git a/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp b/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp
index 0917f0f1f7..0de7658f29 100644
--- a/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp
+++ b/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp
@@ -1751,6 +1751,17 @@ BP5Deserializer::GenerateReadRequests(const bool doAllocTempBuffers, size_t *max
     return Ret;
 }
 
+template 
+static void print1D(const std::string name, const T *in, const CoreDims &count)
+{
+    std::cout << name << " = {\n  "; // << std::setprecision(2);
+    for (size_t i = 0; i < count[0]; ++i)
+    {
+        std::cout << in[i] << " ";
+    }
+    std::cout << "}" << std::endl;
+}
+
 template 
 static void print2D(const std::string name, const T *in, const CoreDims &count)
 {
@@ -1769,20 +1780,65 @@ static void print2D(const std::string name, const T *in, const CoreDims &count)
 }
 
 template 
-void StrideCopyT(const T *in, const CoreDims &inStart, const CoreDims &inCount,
-                 const bool inIsLittleEndian, T *out, const CoreDims &outStart,
-                 const CoreDims &outCount, const bool outIsLittleEndian,
-                 const CoreDims &strideStart, const CoreDims &strideCount,
-                 MemorySpace MemSpace = MemorySpace::Host)
+static void print3D(const std::string name, const T *in, const CoreDims &count)
+{
+    std::cout << name << " = {\n"; // << std::setprecision(2);
+    size_t pos = 0;
+    for (size_t i = 0; i < count[0]; ++i)
+    {
+        std::cout << "    {\n    ";
+        for (size_t j = 0; j < count[1]; ++j)
+        {
+            for (size_t k = 0; k < count[2]; ++k)
+            {
+                std::cout << in[pos] << " ";
+                ++pos;
+            }
+        }
+        std::cout << "    }\n";
+    }
+    std::cout << "  }" << std::endl;
+}
+
+template 
+void StrideCopy1D(const T *in, const CoreDims &inStart, const CoreDims &inCount,
+                  const bool inIsLittleEndian, T *out, const CoreDims &outStart,
+                  const CoreDims &outCount, const bool outIsLittleEndian,
+                  const CoreDims &strideStart, const CoreDims &strideCount,
+                  MemorySpace MemSpace = MemorySpace::Host)
 {
-    std::cout << "StrideCopyT: inStart = " << DimsToString(inStart)
+    std::cout << "StrideCopy1D: inStart = " << DimsToString(inStart)
               << " inCount = " << DimsToString(inCount) << " outStart = " << DimsToString(outStart)
               << " outCount = " << DimsToString(outCount)
               << " strideStart = " << DimsToString(strideStart)
               << " srideCount = " << DimsToString(strideCount) << std::endl;
-    print2D("Incoming block", in, inCount);
+    print1D("Incoming block", in, inCount);
+
+    size_t inPos = strideStart[0];
+    for (size_t i = 0; i < outCount[0]; ++i)
+    {
+        out[i] = in[inPos];
+        inPos += strideCount[0];
+    }
+
+    print1D("Outgoing block", out, outCount);
+    std::cout << std::endl;
+}
+
+template 
+void StrideCopy2D(const T *in, const CoreDims &inStart, const CoreDims &inCount,
+                  const bool inIsLittleEndian, T *out, const CoreDims &outStart,
+                  const CoreDims &outCount, const bool outIsLittleEndian,
+                  const CoreDims &strideStart, const CoreDims &strideCount,
+                  MemorySpace MemSpace = MemorySpace::Host)
+{
+    /*std::cout << "StrideCopy2D: inStart = " << DimsToString(inStart)
+              << " inCount = " << DimsToString(inCount) << " outStart = " << DimsToString(outStart)
+              << " outCount = " << DimsToString(outCount)
+              << " strideStart = " << DimsToString(strideStart)
+              << " srideCount = " << DimsToString(strideCount) << std::endl;*/
+    // print2D("Incoming block", in, inCount);
 
-    /* *in points to very first point of data in N-dim space where striding starts */
     size_t outPos = 0;
     for (size_t i = 0; i < outCount[0]; ++i)
     {
@@ -1796,10 +1852,74 @@ void StrideCopyT(const T *in, const CoreDims &inStart, const CoreDims &inCount,
         }
     }
 
-    print2D("Outgoing block", out, outCount);
+    // print2D("Outgoing block", out, outCount);
     std::cout << std::endl;
 }
 
+template 
+void StrideCopy3D(const T *in, const CoreDims &inStart, const CoreDims &inCount,
+                  const bool inIsLittleEndian, T *out, const CoreDims &outStart,
+                  const CoreDims &outCount, const bool outIsLittleEndian,
+                  const CoreDims &strideStart, const CoreDims &strideCount,
+                  MemorySpace MemSpace = MemorySpace::Host)
+{
+    std::cout << "StrideCopy3D: inStart = " << DimsToString(inStart)
+              << " inCount = " << DimsToString(inCount) << " outStart = " << DimsToString(outStart)
+              << " outCount = " << DimsToString(outCount)
+              << " strideStart = " << DimsToString(strideStart)
+              << " srideCount = " << DimsToString(strideCount) << std::endl;
+    print3D("Incoming block", in, inCount);
+
+    /* FIXME: this is not done yet*/
+    size_t outPos = 0;
+    for (size_t i = 0; i < outCount[0]; ++i)
+    {
+        size_t rowIn = strideStart[0] + i * strideCount[0];
+        size_t inPos = rowIn * inCount[1] + strideStart[1];
+        for (size_t j = 0; j < outCount[1]; ++j)
+        {
+            for (size_t k = 0; k < outCount[3]; ++k)
+            {
+                out[outPos] = in[inPos];
+                ++outPos;
+                inPos += strideCount[1];
+            }
+        }
+    }
+
+    print3D("Outgoing block", out, outCount);
+    std::cout << std::endl;
+}
+
+template 
+void StrideCopyT(const T *in, const CoreDims &inStart, const CoreDims &inCount,
+                 const bool inIsLittleEndian, T *out, const CoreDims &outStart,
+                 const CoreDims &outCount, const bool outIsLittleEndian,
+                 const CoreDims &strideStart, const CoreDims &strideCount,
+                 MemorySpace MemSpace = MemorySpace::Host)
+{
+    switch (inCount.size())
+    {
+    case 1:
+        return StrideCopy1D(in, inStart, inCount, inIsLittleEndian, out, outStart, outCount,
+                            outIsLittleEndian, strideStart, strideCount, MemSpace);
+        break;
+    case 2:
+        return StrideCopy2D(in, inStart, inCount, inIsLittleEndian, out, outStart, outCount,
+                            outIsLittleEndian, strideStart, strideCount, MemSpace);
+        break;
+    case 3:
+        return StrideCopy3D(in, inStart, inCount, inIsLittleEndian, out, outStart, outCount,
+                            outIsLittleEndian, strideStart, strideCount, MemSpace);
+        break;
+    default:
+        helper::Throw(
+            "Toolkit", "format::bp::BP5Deserializer", "StrideCopyT",
+            "Dimension " + std::to_string(inCount.size()) + "not supported");
+        break;
+    }
+}
+
 static inline void StrideCopy(const DataType dtype, const void *in, const CoreDims &inStart,
                               const CoreDims &inCount, const bool inIsLittleEndian, void *out,
                               const CoreDims &outStart, const CoreDims &outCount,
@@ -2019,7 +2139,17 @@ void BP5Deserializer::FinalizeGet(const ReadRequest &Read, const bool freeAddr)
         Dims strideOffset(DimCount, 0ULL); // FIXME
         for (size_t i = 0; i < DimCount; ++i)
         {
-            strideOffset[i] = (IntersectionBox.first[i] - outStart[i]) % strideCount[i];
+            size_t rem = (IntersectionBox.first[i] - outStart[i]) % strideCount[i];
+            // Rem is the number of skipped elements in preceding blocks.
+            // We need to skip in this bock: stride-rem, or 0 if rem is 0
+            if (rem)
+            {
+                strideOffset[i] = strideCount[i] - rem;
+            }
+            else
+            {
+                strideOffset[i] = 0;
+            }
         }
         std::cout << "strideOffset = " << DimsToString(strideOffset) << std::endl;
 
@@ -2138,11 +2268,11 @@ void BP5Deserializer::FinalizeGet(const ReadRequest &Read, const bool freeAddr)
     else
     {
 
-        std::cout << "NdCopy inStart = " << DimsToString(inStart)
+        /*std::cout << "NdCopy inStart = " << DimsToString(inStart)
                   << " inCount = " << DimsToString(inCount)
                   << " outStart = " << DimsToString(outStart)
                   << " outCount = " << DimsToString(outCount) << "\n"
-                  << std::endl;
+                  << std::endl;*/
         helper::NdCopy(stridedData, inStart, inCount, true, true, (char *)Req.Data, outStart,
                        outCount, true, true, ElementSize, CoreDims(), CoreDims(), CoreDims(),
                        CoreDims(), false, Req.MemSpace);

From 74c9cd60c7b611b4185d29f75cd69d5f39a51621 Mon Sep 17 00:00:00 2001
From: Norbert Podhorszki 
Date: Wed, 17 Jan 2024 17:20:02 -0500
Subject: [PATCH 04/19] fix install name of example

---
 examples/basics/stridedRead/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/basics/stridedRead/CMakeLists.txt b/examples/basics/stridedRead/CMakeLists.txt
index a5cffb5a4e..e5daf73424 100644
--- a/examples/basics/stridedRead/CMakeLists.txt
+++ b/examples/basics/stridedRead/CMakeLists.txt
@@ -13,9 +13,9 @@ endif()
 
 add_executable(adios2_basics_stridedRead1D stridedRead1D.cpp)
 target_link_libraries(adios2_basics_stridedRead1D adios2::cxx11 adios2_core)
-install(TARGETS adios2_basics_variablesShapes RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+install(TARGETS adios2_basics_stridedRead1D RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
 
 add_executable(adios2_basics_stridedRead2D stridedRead2D.cpp)
 target_link_libraries(adios2_basics_stridedRead2D adios2::cxx11 adios2_core)
-install(TARGETS adios2_basics_variablesShapes RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+install(TARGETS adios2_basics_stridedRead2D RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
 

From 778aba805fa02e60e36d8376bc792b9dd0310b56 Mon Sep 17 00:00:00 2001
From: Norbert Podhorszki 
Date: Wed, 17 Jan 2024 17:20:16 -0500
Subject: [PATCH 05/19] New basic example readByAccuracy to show MRD on a
 1000x1000 2D array

---
 examples/basics/CMakeLists.txt                |   1 +
 examples/basics/readByAccuracy/CMakeLists.txt |  17 +++
 .../readByAccuracy/readByAccuracy2D.cpp       | 137 ++++++++++++++++++
 source/adios2/helper/adiosString.cpp          |  30 ++++
 source/adios2/helper/adiosString.h            |   5 +
 5 files changed, 190 insertions(+)
 create mode 100644 examples/basics/readByAccuracy/CMakeLists.txt
 create mode 100644 examples/basics/readByAccuracy/readByAccuracy2D.cpp

diff --git a/examples/basics/CMakeLists.txt b/examples/basics/CMakeLists.txt
index 227446362e..5d9c96fc8b 100644
--- a/examples/basics/CMakeLists.txt
+++ b/examples/basics/CMakeLists.txt
@@ -13,4 +13,5 @@ add_subdirectory(queryWorker)
 add_subdirectory(values)
 add_subdirectory(variablesShapes)
 add_subdirectory(stridedRead)
+add_subdirectory(readByAccuracy)
 
diff --git a/examples/basics/readByAccuracy/CMakeLists.txt b/examples/basics/readByAccuracy/CMakeLists.txt
new file mode 100644
index 0000000000..c6940ccd6b
--- /dev/null
+++ b/examples/basics/readByAccuracy/CMakeLists.txt
@@ -0,0 +1,17 @@
+#------------------------------------------------------------------------------#
+# Distributed under the OSI-approved Apache License, Version 2.0.  See
+# accompanying file Copyright.txt for details.
+#------------------------------------------------------------------------------#
+
+cmake_minimum_required(VERSION 3.12)
+project(ADIOS2BasicsReadByAccuracyExample)
+
+if(NOT TARGET adios2_core)
+  set(_components CXX)
+  find_package(ADIOS2 REQUIRED COMPONENTS ${_components})
+endif()
+
+add_executable(adios2_basics_readByAccuracy2D readByAccuracy2D.cpp)
+target_link_libraries(adios2_basics_readByAccuracy2D adios2::cxx11 adios2_core)
+install(TARGETS adios2_basics_readByAccuracy2D RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+
diff --git a/examples/basics/readByAccuracy/readByAccuracy2D.cpp b/examples/basics/readByAccuracy/readByAccuracy2D.cpp
new file mode 100644
index 0000000000..0511409b53
--- /dev/null
+++ b/examples/basics/readByAccuracy/readByAccuracy2D.cpp
@@ -0,0 +1,137 @@
+/*
+ * Distributed under the OSI-approved Apache License, Version 2.0.  See
+ * accompanying file Copyright.txt for details.
+ *
+ * * readByAccuracy2D.cpp : example to read from 2D array with accuracy set
+ * If MGARD MDR operator is available, it will refactor the data on write
+ * and support read with a user defined error. Otherwise, reading "normal"
+ * data is always fully accurate (error = 0.0)
+ *
+ *  Created on: Jan 17, 2024
+ *      Author: Norbert Podhorszki 
+ */
+
+#include   //std::size_t
+#include   // std::setprecision
+#include  // std::cout
+#include    // std::numeric_limits
+#include 
+#include    //std::iota
+#include  //std::exception
+
+#include "adios2/helper/adiosString.h" // AccuracyToString
+#include 
+
+constexpr double TWOPI = 2.0 * M_PI;
+
+void writer(adios2::ADIOS &adios, const std::size_t nx, const std::size_t ny)
+{
+    auto lf_computeTrig = [](const std::size_t nx, const std::size_t ny) -> std::vector {
+        const double sp = TWOPI / nx;
+        std::vector array(nx * ny);
+        size_t pos = 0;
+        for (size_t i = 0; i < nx; ++i)
+        {
+            double c = cos(i * sp);
+            for (size_t j = 0; j < ny; ++j)
+            {
+                array[pos] = c + sin(j * sp);
+                ++pos;
+            }
+        }
+        return array;
+    };
+
+    adios2::IO io = adios.DeclareIO("readByAccuracy2D-writer");
+
+    const adios2::Dims shape = {nx, ny};
+    adios2::Variable var =
+        io.DefineVariable("data/full", shape, {0, 0}, shape, adios2::ConstantDims);
+
+#ifdef ADIOS2_HAVE_MGARD_MDR1
+    adios2::Operator mgardOp = adios.DefineOperator("mgardCompressor", adios2::ops::MDR);
+    var.AddOperation(mgardOp, {});
+    io.DefineAttribute("operator", "Refactored by adios2::ops::MDR", var.Name());
+#else
+    io.DefineAttribute("operator", "none", var.Name());
+#endif
+
+    const std::vector array = lf_computeTrig(nx, ny);
+
+    adios2::Engine writer = io.Open("readByAccuracy2D.bp", adios2::Mode::Write);
+    writer.BeginStep();
+    writer.Put(var, array.data());
+    writer.EndStep();
+    writer.Close();
+}
+
+const std::vector errors = {1.0, 0.1, 0.001, 0.00001};
+
+void reader(adios2::ADIOS &adios)
+{
+    adios2::IO io = adios.DeclareIO("readByAccuracy2D-reader");
+    io.SetParameter("Threads", "1");
+
+    // read data N times into N vectors so that we can output them later
+    std::vector> arrays(errors.size());
+    std::vector actualAccuracy(errors.size());
+
+    adios2::Dims varShape;
+
+    {
+        adios2::Engine reader = io.Open("readByAccuracy2D.bp", adios2::Mode::Read);
+        reader.BeginStep();
+
+        adios2::Variable varFull = io.InquireVariable("data/full");
+        varShape = varFull.Shape();
+
+        for (size_t i = 0; i < errors.size(); ++i)
+        {
+            adios2::Accuracy requestedAccuracy = {errors[i], adios2::Linf_norm, false};
+            varFull.SetAccuracy(requestedAccuracy);
+            // adios will allocate the vector to fit the data
+            // force reading now, so that we can retrieve the accuracy from 'v'
+            reader.Get(varFull, arrays[i], adios2::Mode::Sync);
+            actualAccuracy[i] = varFull.GetAccuracy();
+        }
+        reader.EndStep();
+        reader.Close();
+    }
+
+    // write out the result
+    {
+        adios2::IO io = adios.DeclareIO("readByAccuracy2D-write-again");
+        adios2::Engine writer = io.Open("readByAccuracy2D.bp", adios2::Mode::Append);
+        writer.BeginStep();
+        for (size_t i = 0; i < errors.size(); ++i)
+        {
+            std::string outname = "data/" + std::to_string(errors[i]);
+            adios2::Variable v =
+                io.DefineVariable(outname, varShape, {0, 0}, varShape);
+            io.DefineAttribute("accuracy", adios2::helper::AccuracyToString(actualAccuracy[i]),
+                               v.Name());
+
+            writer.Put(v, arrays[i].data());
+        }
+        writer.EndStep();
+        writer.Close();
+    }
+}
+
+int main(int argc, char *argv[])
+{
+    try
+    {
+        constexpr std::size_t nx = 1000;
+        constexpr std::size_t ny = 1000;
+        adios2::ADIOS adios;
+        writer(adios, nx, ny);
+        reader(adios);
+    }
+    catch (const std::exception &e)
+    {
+        std::cout << "ERROR: ADIOS2 exception: " << e.what() << "\n";
+    }
+
+    return 0;
+}
diff --git a/source/adios2/helper/adiosString.cpp b/source/adios2/helper/adiosString.cpp
index 3ee9f736ea..2b4b88d6eb 100644
--- a/source/adios2/helper/adiosString.cpp
+++ b/source/adios2/helper/adiosString.cpp
@@ -14,6 +14,7 @@
 #include "adiosType.h" //BytesFactor
 
 /// \cond EXCLUDE_FROM_DOXYGEN
+#include  // isinf()
 #include 
 #include  //std::ios_base::failure
 #include 
@@ -509,5 +510,34 @@ std::string RemoveTrailingSlash(const std::string &name) noexcept
     return name.substr(0, len);
 }
 
+std::string AccuracyToString(const adios2::Accuracy &a) noexcept
+{
+    // std::string dimensionsString("Dims(" + std::to_string(ndim) + "):[");
+    std::string str = "Accuracy{error = " + std::to_string(a.error) + ", norm = ";
+
+    if (a.norm == L2_norm)
+    {
+        str += "L2_norm";
+    }
+    else if (std::isinf(a.norm))
+    {
+        str += "Linf_norm";
+    }
+    else if (std::isnan(a.norm))
+    {
+        str += "nan";
+    }
+    else
+    {
+        str += std::to_string(a.norm);
+    }
+
+    str += ", ";
+    str += (a.relative ? "rel" : "abs");
+    str += "}";
+
+    return str;
+}
+
 } // end namespace helper
 } // end namespace adios2
diff --git a/source/adios2/helper/adiosString.h b/source/adios2/helper/adiosString.h
index 4c43318c12..1f0873d5ac 100644
--- a/source/adios2/helper/adiosString.h
+++ b/source/adios2/helper/adiosString.h
@@ -206,6 +206,11 @@ std::set PrefixMatches(const std::string &prefix,
  */
 std::string RemoveTrailingSlash(const std::string &name) noexcept;
 
+/**
+ * Return a string from an adios2::Accuracy object
+ */
+std::string AccuracyToString(const adios2::Accuracy &a) noexcept;
+
 } // end namespace helper
 } // end namespace adios2
 

From 30c93d859b261e0eb62e1b508d19f74bfbc083d2 Mon Sep 17 00:00:00 2001
From: Norbert Podhorszki 
Date: Thu, 18 Jan 2024 10:48:58 -0500
Subject: [PATCH 06/19] bpls: added --stride argument

---
 source/utils/bpls/bpls.cpp | 100 ++++++++++++++++++++++++++++++++-----
 source/utils/bpls/bpls.h   |   2 +-
 2 files changed, 88 insertions(+), 14 deletions(-)

diff --git a/source/utils/bpls/bpls.cpp b/source/utils/bpls/bpls.cpp
index 0d3ce8e73c..9f392d752d 100644
--- a/source/utils/bpls/bpls.cpp
+++ b/source/utils/bpls/bpls.cpp
@@ -84,6 +84,7 @@ std::string transport_params; // Transport parameters (e.g. "Library=stdio,verbo
 std::string engine_name;      // Engine name (e.g. "BP5")
 std::string engine_params;    // Engine parameters (e.g. "SelectSteps=0:5:2")
 std::string accuracy_def;     // Accuracy definition (e.g. "accuracy="0.0,0.0,rel")
+std::string stride;           // Stride definition (e.g. --stride "2,3")
 
 // Flags from arguments or defaults
 bool dump; // dump data not just list info(flag == 1)
@@ -110,6 +111,7 @@ bool accuracyWasSet = false;
 char *prgname; /* argv[0] */
 // long timefrom, timeto;
 int64_t istart[MAX_DIMS], icount[MAX_DIMS]; // negative values are allowed
+int64_t istride[MAX_DIMS];                  // negative values are NOT allowed
 int ndimsspecified = 0;
 #ifdef USE_C_REGEX
 regex_t varregex[MAX_MASKS]; // compiled regular expressions of varmask
@@ -160,6 +162,8 @@ void display_help()
            "  --count     | -c \"spec\"    Number of elements in each dimension\n"
            "                               -1 denotes 'until end' of dimension\n"
            "                               (default is -1 for all dimensions)\n"
+           "  --stride \"spec\"            Stride with number of elements in each dimension\n"
+           "                               Steps are not included in this specification!\n"
            "  --noindex   | -y           Print data without array indices\n"
            "  --string    | -S           Print 8bit integer arrays as strings\n"
            "  --columns   | -n \"cols\"    Number of data elements per row to "
@@ -614,6 +618,8 @@ int bplsMain(int argc, char *argv[])
                     "denotes 'until end' of dimension. default is -1 for all "
                     "dimensions");
     arg.AddArgument("-c", argT::SPACE_ARGUMENT, &count, "");
+    arg.AddArgument("--stride", argT::SPACE_ARGUMENT, &stride,
+                    "            Stride with number of elements in each dimension.");
     arg.AddBooleanArgument("--noindex", &noindex, " | -y Print data without array indices");
     arg.AddBooleanArgument("-y", &noindex, "");
     arg.AddBooleanArgument("--timestep", ×tep, " | -t Print values of timestep elements");
@@ -695,6 +701,7 @@ int bplsMain(int argc, char *argv[])
     /* Process dimension specifications */
     parseDimSpec(start, istart);
     parseDimSpec(count, icount);
+    parseDimSpec(stride, istride, false);
 
     // process the regular expressions
     if (use_regexp)
@@ -783,6 +790,7 @@ void init_globals()
     {
         istart[i] = 0LL;
         icount[i] = -1LL; // read full var by default
+        istride[i] = 1LL;
     }
     ndimsspecified = 0;
 }
@@ -832,6 +840,11 @@ void printSettings(void)
         PRINT_DIMS_INT64("  count", icount, ndimsspecified, i);
         printf("\n");
     }
+    if (stride.size())
+    {
+        PRINT_DIMS_INT64("  stride", istride, ndimsspecified, i);
+        printf("\n");
+    }
 
     if (longopt)
         printf("      -l : show scalar values and min/max/avg of arrays\n");
@@ -1932,6 +1945,7 @@ int readVar(core::Engine *fp, core::IO *io, core::Variable *variable)
         count_t[MAX_DIMS]; // processed <0 values in start/count
     uint64_t s[MAX_DIMS],
         c[MAX_DIMS]; // for block reading of smaller chunks
+    uint64_t c_stride[MAX_DIMS];
     int tdims;       // number of dimensions including time
     int tidx;        // 0 or 1 to account for time dimension
     uint64_t nelems; // number of elements to read
@@ -1944,6 +1958,7 @@ int readVar(core::Engine *fp, core::IO *io, core::Variable *variable)
     uint64_t actualreadn;     // our decision how much to read at once
     uint64_t readn[MAX_DIMS]; // how big chunk to read in in each dimension?
     int ndigits_dims[32];     // # of digits (to print) of each dimension
+    adios2::Dims strideDims;  // filled if striding is specified
 
     const size_t elemsize = variable->m_ElementSize;
     const int nsteps = (timestep ? 1 : static_cast(variable->GetAvailableStepsCount()));
@@ -2078,6 +2093,15 @@ int readVar(core::Engine *fp, core::IO *io, core::Variable *variable)
                    start_t[j + tidx], j + tidx, count_t[j + tidx], nelems);
     }
 
+    if (stride.size())
+    {
+
+        for (j = 0; j < ndim; j++)
+        {
+            strideDims.push_back(static_cast(istride[j]));
+        }
+    }
+
     if (verbose > 1)
     {
         printf(" total size of data to read = %" PRIu64 "\n", nelems * elemsize);
@@ -2093,16 +2117,37 @@ int readVar(core::Engine *fp, core::IO *io, core::Variable *variable)
     if (xmlprint && nelems > maxreadn)
         maxreadn = nelems;
 
-    // special case: string. Need to use different elemsize
-    /*if (vi->type == DataType::String)
-    {
-        if (vi->value)
-            elemsize = strlen(vi->value) + 1;
-        maxreadn = elemsize;
-    }*/
+        // special case: string. Need to use different elemsize
+        /*if (vi->type == DataType::String)
+        {
+            if (vi->value)
+                elemsize = strlen(vi->value) + 1;
+            maxreadn = elemsize;
+        }*/
 
-    // allocate data array
-    // data = (T *)malloc(maxreadn * elemsize);
+        // allocate data array
+        // data = (T *)malloc(maxreadn * elemsize);
+
+        // If stride is specified, we need to update count_t now
+#if 0
+    if (!strideDims.empty() && variable->m_ShapeID == ShapeID::GlobalArray)
+    {
+        const Dims sv = helper::Uint64ArrayToSizetVector(tdims - tidx, start_t + tidx);
+        const Dims cv = helper::Uint64ArrayToSizetVector(tdims - tidx, count_t + tidx);
+        variable->SetSelection({sv, cv});
+        variable->SetStride(strideDims);
+        auto sel = variable->Selection();
+        printf("sv = %s cv = %s sel.first = %s sel.second = %s count_t={",
+               helper::DimsToString(sv).c_str(), helper::DimsToString(cv).c_str(),
+               helper::DimsToString(sel.first).c_str(), helper::DimsToString(sel.second).c_str());
+        for (j = tidx; j < tdims; j++)
+        {
+            // count_t[j] = sel.second[j - tidx];
+            printf("%" PRIu64 " ", count_t[j]);
+        }
+        printf("}\n");
+    }
+#endif
 
     // determine strategy how to read in:
     //  - at once
@@ -2121,7 +2166,7 @@ int readVar(core::Engine *fp, core::IO *io, core::Variable *variable)
         }
         else
         {
-            readn[i] = maxreadn / (int)sum; // sum is small for 4 bytes here
+            readn[i] = maxreadn / sum;
             // this may be over the max count for this dimension
             if (readn[i] > count_t[i])
                 readn[i] = count_t[i];
@@ -2141,6 +2186,7 @@ int readVar(core::Engine *fp, core::IO *io, core::Variable *variable)
     {
         s[j] = start_t[j];
         c[j] = readn[j];
+        c_stride[j] = c[j];
 
         ndigits_dims[j] =
             ndigits(start_t[j] + count_t[j] - 1); // -1: dim=100 results in 2 digits (0..99)
@@ -2183,6 +2229,27 @@ int readVar(core::Engine *fp, core::IO *io, core::Variable *variable)
         if (variable->m_ShapeID == ShapeID::GlobalArray)
         {
             variable->SetSelection({startv, countv});
+            if (!strideDims.empty())
+            {
+                variable->SetStride(strideDims);
+                auto sel = variable->Selection();
+                // printf("startv = %s countv = %s sel.first = %s sel.second = %s c_stride={",
+                //        helper::DimsToString(startv).c_str(),
+                //        helper::DimsToString(countv).c_str(),
+                //        helper::DimsToString(sel.first).c_str(),
+                //        helper::DimsToString(sel.second).c_str());
+                for (j = 0; j < tdims; j++)
+                {
+                    c_stride[j] = sel.second[j];
+                    // printf("%" PRIu64 " ", c_stride[j]);
+                }
+                // printf("}\n");
+            }
+            /*else
+            {
+                printf("startv = %s countv = %s \n", helper::DimsToString(startv).c_str(),
+                       helper::DimsToString(countv).c_str());
+            }*/
         }
 
         if (tidx)
@@ -2198,9 +2265,8 @@ int readVar(core::Engine *fp, core::IO *io, core::Variable *variable)
 
         dataV.resize(variable->SelectionSize());
         fp->Get(*variable, dataV, adios2::Mode::Sync);
-
         // print slice
-        print_dataset(dataV.data(), variable->m_Type, s, c, tdims, ndigits_dims);
+        print_dataset(dataV.data(), variable->m_Type, s, c_stride, tdims, ndigits_dims);
 
         // prepare for next read
         sum += actualreadn;
@@ -3896,7 +3962,7 @@ int parseAccuracy()
 // parse a string "0, 3; 027" into an integer array
 // of [0,3,27]
 // exits if parsing failes
-void parseDimSpec(const std::string &str, int64_t *dims)
+void parseDimSpec(const std::string &str, int64_t *dims, bool negativeAllowed)
 {
     if (str.empty())
         return;
@@ -3921,6 +3987,14 @@ void parseDimSpec(const std::string &str, int64_t *dims)
                     token, str.c_str());
             exit(200);
         }
+        if (dims[i] < 0 && !negativeAllowed)
+        {
+            fprintf(stderr,
+                    "Error: Negative value is not allowed for this spec: "
+                    "%s from \"%s\"\n",
+                    token, str.c_str());
+            exit(200);
+        }
 
         // get next item
         // token = strtok_r(NULL, " ,;x\t\n", &saveptr);
diff --git a/source/utils/bpls/bpls.h b/source/utils/bpls/bpls.h
index 683c582b4d..b90af5648f 100644
--- a/source/utils/bpls/bpls.h
+++ b/source/utils/bpls/bpls.h
@@ -56,7 +56,7 @@ struct Entry
 char *mystrndup(const char *s, size_t n);
 void init_globals();
 void processDimSpecs();
-void parseDimSpec(const std::string &str, int64_t *dims);
+void parseDimSpec(const std::string &str, int64_t *dims, bool negativeAllowed = true);
 int parseAccuracy();
 int compile_regexp_masks(void);
 void printSettings(void);

From 4e949acd180c2c1a58feaefc041beccb9ba5d462 Mon Sep 17 00:00:00 2001
From: Norbert Podhorszki 
Date: Fri, 19 Jan 2024 16:07:50 -0500
Subject: [PATCH 07/19] Save work: initial window for stencil calculation,
 missing calculation of points with window, and window update

---
 source/adios2/core/VariableBase.cpp           |  26 +++-
 .../toolkit/format/bp5/BP5Deserializer.cpp    | 111 ++++++++++++++----
 2 files changed, 109 insertions(+), 28 deletions(-)

diff --git a/source/adios2/core/VariableBase.cpp b/source/adios2/core/VariableBase.cpp
index 8d695fb182..f066e6fa84 100644
--- a/source/adios2/core/VariableBase.cpp
+++ b/source/adios2/core/VariableBase.cpp
@@ -296,7 +296,7 @@ void VariableBase::SetStride(const Dims &stride, const DoubleMatrix &stencil)
     }
     else if (m_ShapeID == ShapeID::LocalArray)
     {
-        ndim = stride.size(); // TODO!!! Ho do we get the ndim of a local array?
+        ndim = stride.size(); // FIXME: Need to get the ndim of a local array?
     }
     else
     {
@@ -306,7 +306,8 @@ void VariableBase::SetStride(const Dims &stride, const DoubleMatrix &stencil)
 
     if (stencil.shape.empty())
     {
-        m_StrideStencil = DoubleMatrix({1}, {1.0});
+        Dims s(ndim, 1ULL);
+        m_StrideStencil = DoubleMatrix(s, {1.0});
     }
     else
     {
@@ -316,11 +317,30 @@ void VariableBase::SetStride(const Dims &stride, const DoubleMatrix &stencil)
     if (m_Stride.size() != ndim)
     {
         helper::Throw("Core", "VariableBase", "SetStride",
-                                             "invalid stride dimensions " +
+                                             "invalid number of stride dimensions " +
                                                  std::to_string(stride.size()) +
                                                  ", must be equal to the shape of the variable");
     }
 
+    if (m_StrideStencil.shape.size() != ndim)
+    {
+        helper::Throw("Core", "VariableBase", "SetStride",
+                                             "invalid number of stencil dimensions " +
+                                                 std::to_string(m_StrideStencil.shape.size()) +
+                                                 ", must be equal to the shape of the variable");
+    }
+
+    for (size_t i = 0; i < ndim; ++i)
+    {
+        if (m_StrideStencil.shape[i] % 2 != 1)
+        {
+            helper::Throw(
+                "Core", "VariableBase", "SetStride",
+                "invalid stencil dimension " + std::to_string(m_StrideStencil.shape[i]) +
+                    ", must be an odd number. The stencil must have a center value");
+        }
+    }
+
     if (helper::IsIntegerType(m_Type))
     {
         if (m_StrideStencil.shape.size() != 1 || m_StrideStencil.data.size() != 1 ||
diff --git a/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp b/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp
index 0de7658f29..5ab1f70261 100644
--- a/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp
+++ b/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp
@@ -1805,7 +1805,7 @@ void StrideCopy1D(const T *in, const CoreDims &inStart, const CoreDims &inCount,
                   const bool inIsLittleEndian, T *out, const CoreDims &outStart,
                   const CoreDims &outCount, const bool outIsLittleEndian,
                   const CoreDims &strideStart, const CoreDims &strideCount,
-                  MemorySpace MemSpace = MemorySpace::Host)
+                  const DoubleMatrix &stencil, MemorySpace MemSpace = MemorySpace::Host)
 {
     std::cout << "StrideCopy1D: inStart = " << DimsToString(inStart)
               << " inCount = " << DimsToString(inCount) << " outStart = " << DimsToString(outStart)
@@ -1830,15 +1830,15 @@ void StrideCopy2D(const T *in, const CoreDims &inStart, const CoreDims &inCount,
                   const bool inIsLittleEndian, T *out, const CoreDims &outStart,
                   const CoreDims &outCount, const bool outIsLittleEndian,
                   const CoreDims &strideStart, const CoreDims &strideCount,
-                  MemorySpace MemSpace = MemorySpace::Host)
+                  const DoubleMatrix &stencil, MemorySpace MemSpace = MemorySpace::Host)
 {
     /*std::cout << "StrideCopy2D: inStart = " << DimsToString(inStart)
               << " inCount = " << DimsToString(inCount) << " outStart = " << DimsToString(outStart)
               << " outCount = " << DimsToString(outCount)
               << " strideStart = " << DimsToString(strideStart)
               << " srideCount = " << DimsToString(strideCount) << std::endl;*/
-    // print2D("Incoming block", in, inCount);
-
+    print2D("Incoming block", in, inCount);
+#if 0
     size_t outPos = 0;
     for (size_t i = 0; i < outCount[0]; ++i)
     {
@@ -1851,8 +1851,66 @@ void StrideCopy2D(const T *in, const CoreDims &inStart, const CoreDims &inCount,
             inPos += strideCount[1];
         }
     }
+#else
+    print2D("Incoming stencil", stencil.data.data(), CoreDims(stencil.shape));
+    // window of values to calculate with the stencil
+    std::vector window(stencil.shape[0] * stencil.shape[1]);
+    {
+        // initialize window: center value is in[strideStart[]],
+        // the left and top off center is initialized with the same value
+        // to the right/below, resp. if they fall outside of the boundary
+        // (top-left corner) of the input buffer (i.e. no data available)
+
+        // start with last position in data to start filling the stencil backwards
+        const size_t nX = strideStart[0] + (stencil.shape[0] / 2);
+        const size_t nY = strideStart[1] + (stencil.shape[1] / 2);
+        size_t winPos = window.size() - 1;
+
+        size_t idx0 = 0;
+        for (; idx0 <= nX && idx0 < stencil.shape[0]; ++idx0)
+        {
+            // rows within the input array
+            size_t inPos = (nX - idx0) * inCount[1] + nY;
+            size_t idx1 = 0;
+            for (; idx1 <= nY && idx1 < stencil.shape[1]; ++idx1)
+            {
+                // columns within the input array
+                window[winPos] = in[inPos];
+                --winPos;
+                --inPos;
+            }
+            for (; idx1 < stencil.shape[1]; ++idx1)
+            {
+                // window point left to edge of input, copy value on the right.
+                window[winPos] = window[winPos + 1];
+                --winPos;
+            }
+        }
+        for (; idx0 < stencil.shape[0]; ++idx0)
+        {
+            // rows outside the input array: copy last row of window to this
+            // one
+            memcpy(window.data() + (stencil.shape[0] - idx0 - 1) * stencil.shape[1],
+                   window.data() + (stencil.shape[0] - idx0) * stencil.shape[1],
+                   stencil.shape[1] * sizeof(T));
+        }
+    }
+    print2D("Window initialized", window.data(), CoreDims(stencil.shape));
 
-    // print2D("Outgoing block", out, outCount);
+    size_t outPos = 0;
+    for (size_t i = 0; i < outCount[0]; ++i)
+    {
+        size_t rowIn = strideStart[0] + i * strideCount[0];
+        size_t inPos = rowIn * inCount[1] + strideStart[1];
+        for (size_t j = 0; j < outCount[1]; ++j)
+        {
+            out[outPos] = in[inPos];
+            ++outPos;
+            inPos += strideCount[1];
+        }
+    }
+#endif
+    print2D("Outgoing block", out, outCount);
     std::cout << std::endl;
 }
 
@@ -1861,7 +1919,7 @@ void StrideCopy3D(const T *in, const CoreDims &inStart, const CoreDims &inCount,
                   const bool inIsLittleEndian, T *out, const CoreDims &outStart,
                   const CoreDims &outCount, const bool outIsLittleEndian,
                   const CoreDims &strideStart, const CoreDims &strideCount,
-                  MemorySpace MemSpace = MemorySpace::Host)
+                  const DoubleMatrix &stencil, MemorySpace MemSpace = MemorySpace::Host)
 {
     std::cout << "StrideCopy3D: inStart = " << DimsToString(inStart)
               << " inCount = " << DimsToString(inCount) << " outStart = " << DimsToString(outStart)
@@ -1896,21 +1954,21 @@ void StrideCopyT(const T *in, const CoreDims &inStart, const CoreDims &inCount,
                  const bool inIsLittleEndian, T *out, const CoreDims &outStart,
                  const CoreDims &outCount, const bool outIsLittleEndian,
                  const CoreDims &strideStart, const CoreDims &strideCount,
-                 MemorySpace MemSpace = MemorySpace::Host)
+                 const DoubleMatrix &stencil, MemorySpace MemSpace = MemorySpace::Host)
 {
     switch (inCount.size())
     {
     case 1:
         return StrideCopy1D(in, inStart, inCount, inIsLittleEndian, out, outStart, outCount,
-                            outIsLittleEndian, strideStart, strideCount, MemSpace);
+                            outIsLittleEndian, strideStart, strideCount, stencil, MemSpace);
         break;
     case 2:
         return StrideCopy2D(in, inStart, inCount, inIsLittleEndian, out, outStart, outCount,
-                            outIsLittleEndian, strideStart, strideCount, MemSpace);
+                            outIsLittleEndian, strideStart, strideCount, stencil, MemSpace);
         break;
     case 3:
         return StrideCopy3D(in, inStart, inCount, inIsLittleEndian, out, outStart, outCount,
-                            outIsLittleEndian, strideStart, strideCount, MemSpace);
+                            outIsLittleEndian, strideStart, strideCount, stencil, MemSpace);
         break;
     default:
         helper::Throw(
@@ -1924,7 +1982,8 @@ static inline void StrideCopy(const DataType dtype, const void *in, const CoreDi
                               const CoreDims &inCount, const bool inIsLittleEndian, void *out,
                               const CoreDims &outStart, const CoreDims &outCount,
                               const bool outIsLittleEndian, const CoreDims &strideStart,
-                              const CoreDims &strideCount, MemorySpace MemSpace = MemorySpace::Host)
+                              const CoreDims &strideCount, const DoubleMatrix &stencil,
+                              MemorySpace MemSpace = MemorySpace::Host)
 {
     switch (dtype)
     {
@@ -1934,68 +1993,69 @@ static inline void StrideCopy(const DataType dtype, const void *in, const CoreDi
     case DataType::Int8:
         StrideCopyT((int8_t *)in, inStart, inCount, inIsLittleEndian, (int8_t *)out,
                             outStart, outCount, outIsLittleEndian, strideStart, strideCount,
-                            MemSpace);
+                            stencil, MemSpace);
         break;
     case DataType::Int16:
         StrideCopyT((int16_t *)in, inStart, inCount, inIsLittleEndian, (int16_t *)out,
                              outStart, outCount, outIsLittleEndian, strideStart, strideCount,
-                             MemSpace);
+                             stencil, MemSpace);
         break;
     case DataType::Int32:
         StrideCopyT((int32_t *)in, inStart, inCount, inIsLittleEndian, (int32_t *)out,
                              outStart, outCount, outIsLittleEndian, strideStart, strideCount,
-                             MemSpace);
+                             stencil, MemSpace);
         break;
     case DataType::Int64:
         StrideCopyT((int64_t *)in, inStart, inCount, inIsLittleEndian, (int64_t *)out,
                              outStart, outCount, outIsLittleEndian, strideStart, strideCount,
-                             MemSpace);
+                             stencil, MemSpace);
         break;
     case DataType::UInt8:
         StrideCopyT((uint8_t *)in, inStart, inCount, inIsLittleEndian, (uint8_t *)out,
                              outStart, outCount, outIsLittleEndian, strideStart, strideCount,
-                             MemSpace);
+                             stencil, MemSpace);
         break;
     case DataType::UInt16:
         StrideCopyT((uint16_t *)in, inStart, inCount, inIsLittleEndian, (uint16_t *)out,
                               outStart, outCount, outIsLittleEndian, strideStart, strideCount,
-                              MemSpace);
+                              stencil, MemSpace);
         break;
     case DataType::UInt32:
         StrideCopyT((uint32_t *)in, inStart, inCount, inIsLittleEndian, (uint32_t *)out,
                               outStart, outCount, outIsLittleEndian, strideStart, strideCount,
-                              MemSpace);
+                              stencil, MemSpace);
         break;
     case DataType::UInt64:
         StrideCopyT((uint64_t *)in, inStart, inCount, inIsLittleEndian, (uint64_t *)out,
                               outStart, outCount, outIsLittleEndian, strideStart, strideCount,
-                              MemSpace);
+                              stencil, MemSpace);
         break;
     case DataType::Float:
         StrideCopyT((float *)in, inStart, inCount, inIsLittleEndian, (float *)out, outStart,
-                           outCount, outIsLittleEndian, strideStart, strideCount, MemSpace);
+                           outCount, outIsLittleEndian, strideStart, strideCount, stencil,
+                           MemSpace);
         break;
     case DataType::Double:
         StrideCopyT((double *)in, inStart, inCount, inIsLittleEndian, (double *)out,
                             outStart, outCount, outIsLittleEndian, strideStart, strideCount,
-                            MemSpace);
+                            stencil, MemSpace);
         break;
     case DataType::LongDouble:
         StrideCopyT((long double *)in, inStart, inCount, inIsLittleEndian,
                                  (long double *)out, outStart, outCount, outIsLittleEndian,
-                                 strideStart, strideCount, MemSpace);
+                                 strideStart, strideCount, stencil, MemSpace);
         break;
     case DataType::FloatComplex:
         StrideCopyT>((std::complex *)in, inStart, inCount,
                                          inIsLittleEndian, (std::complex *)out, outStart,
                                          outCount, outIsLittleEndian, strideStart, strideCount,
-                                         MemSpace);
+                                         stencil, MemSpace);
         break;
     case DataType::DoubleComplex:
         StrideCopyT>((std::complex *)in, inStart, inCount,
                                           inIsLittleEndian, (std::complex *)out, outStart,
                                           outCount, outIsLittleEndian, strideStart, strideCount,
-                                          MemSpace);
+                                          stencil, MemSpace);
         break;
     default:
         break;
@@ -2197,7 +2257,8 @@ void BP5Deserializer::FinalizeGet(const ReadRequest &Read, const bool freeAddr)
 
         char *dataptr = IncomingData;
         StrideCopy(((struct BP5VarRec *)Req.VarRec)->Type, dataptr, inStart, inCount, true,
-                   stridedData, st, ct, true, strideOffset, strideCount, Req.MemSpace);
+                   stridedData, st, ct, true, strideOffset, strideCount, VB->m_StrideStencil,
+                   Req.MemSpace);
 
         /* Set the "incoming box" to the strided space instead of the global space
            for the NdCopy operation */

From 70375cc021d4f2482cc376431ae32da96077e65e Mon Sep 17 00:00:00 2001
From: Norbert Podhorszki 
Date: Mon, 22 Jan 2024 07:10:42 -0500
Subject: [PATCH 08/19] fix DoubleMatrix constructor from pointe

---
 source/adios2/common/ADIOSTypes.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/adios2/common/ADIOSTypes.cpp b/source/adios2/common/ADIOSTypes.cpp
index e603250654..004b094e69 100644
--- a/source/adios2/common/ADIOSTypes.cpp
+++ b/source/adios2/common/ADIOSTypes.cpp
@@ -465,7 +465,7 @@ DoubleMatrix::DoubleMatrix(){};
 DoubleMatrix::DoubleMatrix(const Dims &dims, const std::vector &d) : shape(dims), data(d){};
 DoubleMatrix::DoubleMatrix(const Dims &dims, const double *d) : shape(dims)
 {
-    size_t n = 0;
+    size_t n = 1;
     for (const auto d : dims)
     {
         n *= d;

From c3905b3ee006222acc014ee58938bf3f7a8b967d Mon Sep 17 00:00:00 2001
From: Norbert Podhorszki 
Date: Mon, 22 Jan 2024 07:11:30 -0500
Subject: [PATCH 09/19] Campaign engine, pass on stride/stencil info

---
 source/adios2/engine/campaign/CampaignReader.tcc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/source/adios2/engine/campaign/CampaignReader.tcc b/source/adios2/engine/campaign/CampaignReader.tcc
index d635a1a976..2b1616c774 100644
--- a/source/adios2/engine/campaign/CampaignReader.tcc
+++ b/source/adios2/engine/campaign/CampaignReader.tcc
@@ -68,6 +68,8 @@ CampaignReader::TranslateToActualVariable(Variable &variable)
     v->m_MemoryStart = variable.m_MemoryStart;
     v->m_MemoryCount = variable.m_MemoryCount;
     v->m_MemSpace = variable.m_MemSpace;
+    v->m_Stride = variable.m_Stride;
+    v->m_StrideStencil = variable.m_StrideStencil;
     return std::make_pair(v, e);
 }
 

From a3cc86404e393a9be3766438a11399578f369db3 Mon Sep 17 00:00:00 2001
From: Norbert Podhorszki 
Date: Mon, 22 Jan 2024 07:12:41 -0500
Subject: [PATCH 10/19] Add stride/stencil info to remote Get request

---
 source/adios2/engine/bp5/BP5Reader.cpp         |  3 ++-
 .../toolkit/format/bp5/BP5Deserializer.cpp     |  8 ++++++--
 .../toolkit/format/bp5/BP5Deserializer.h       |  2 ++
 source/adios2/toolkit/remote/Remote.cpp        |  6 +++++-
 source/adios2/toolkit/remote/Remote.h          |  3 ++-
 source/adios2/toolkit/remote/remote_common.cpp |  5 +++++
 source/adios2/toolkit/remote/remote_common.h   |  4 ++++
 source/adios2/toolkit/remote/remote_server.cpp | 18 ++++++++++++++++++
 8 files changed, 44 insertions(+), 5 deletions(-)

diff --git a/source/adios2/engine/bp5/BP5Reader.cpp b/source/adios2/engine/bp5/BP5Reader.cpp
index 636f67ace4..12f9f963b1 100644
--- a/source/adios2/engine/bp5/BP5Reader.cpp
+++ b/source/adios2/engine/bp5/BP5Reader.cpp
@@ -294,7 +294,8 @@ void BP5Reader::PerformRemoteGets()
     auto GetRequests = m_BP5Deserializer->PendingGetRequests;
     for (auto &Req : GetRequests)
     {
-        m_Remote.Get(Req.VarName, Req.RelStep, Req.BlockID, Req.Count, Req.Start, Req.Data);
+        m_Remote.Get(Req.VarName, Req.RelStep, Req.BlockID, Req.Count, Req.Start, *(Req.Stride),
+                     *Req.Stencil, Req.Data);
     }
 }
 
diff --git a/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp b/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp
index 5ab1f70261..b33be05089 100644
--- a/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp
+++ b/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp
@@ -1421,6 +1421,8 @@ bool BP5Deserializer::QueueGetSingle(core::VariableBase &variable, void *DestDat
         Req.BlockID = (size_t)-1;
         Req.Count = variable.m_Count;
         Req.Start = variable.m_Start;
+        Req.Stride = &(variable.m_Stride);
+        Req.Stencil = &(variable.m_StrideStencil);
         Req.Step = AbsStep;
         Req.RelStep = RelStep;
         Req.MemSpace = MemSpace;
@@ -1439,6 +1441,8 @@ bool BP5Deserializer::QueueGetSingle(core::VariableBase &variable, void *DestDat
         {
             Req.Start = variable.m_Start;
             Req.Count = variable.m_Count;
+            Req.Stride = &(variable.m_Stride);
+            Req.Stencil = &(variable.m_StrideStencil);
         }
         Req.Data = DestData;
         Req.MemSpace = MemSpace;
@@ -1837,7 +1841,7 @@ void StrideCopy2D(const T *in, const CoreDims &inStart, const CoreDims &inCount,
               << " outCount = " << DimsToString(outCount)
               << " strideStart = " << DimsToString(strideStart)
               << " srideCount = " << DimsToString(strideCount) << std::endl;*/
-    print2D("Incoming block", in, inCount);
+    // print2D("Incoming block", in, inCount);
 #if 0
     size_t outPos = 0;
     for (size_t i = 0; i < outCount[0]; ++i)
@@ -1910,7 +1914,7 @@ void StrideCopy2D(const T *in, const CoreDims &inStart, const CoreDims &inCount,
         }
     }
 #endif
-    print2D("Outgoing block", out, outCount);
+    // print2D("Outgoing block", out, outCount);
     std::cout << std::endl;
 }
 
diff --git a/source/adios2/toolkit/format/bp5/BP5Deserializer.h b/source/adios2/toolkit/format/bp5/BP5Deserializer.h
index ad6c53fb13..7a9f5a7b74 100644
--- a/source/adios2/toolkit/format/bp5/BP5Deserializer.h
+++ b/source/adios2/toolkit/format/bp5/BP5Deserializer.h
@@ -105,6 +105,8 @@ class BP5Deserializer : virtual public BP5Base
         Dims Start;
         Dims Count;
         MemorySpace MemSpace;
+        Dims *Stride;
+        DoubleMatrix *Stencil;
         void *Data;
     };
     std::vector PendingGetRequests;
diff --git a/source/adios2/toolkit/remote/Remote.cpp b/source/adios2/toolkit/remote/Remote.cpp
index 29c0da3184..ff2cac0213 100644
--- a/source/adios2/toolkit/remote/Remote.cpp
+++ b/source/adios2/toolkit/remote/Remote.cpp
@@ -148,7 +148,7 @@ void Remote::OpenSimpleFile(const std::string hostname, const int32_t port,
 }
 
 Remote::GetHandle Remote::Get(char *VarName, size_t Step, size_t BlockID, Dims &Count, Dims &Start,
-                              void *dest)
+                              Dims &Stride, DoubleMatrix &Stencil, void *dest)
 {
     RemoteCommon::_GetRequestMsg GetMsg;
     memset(&GetMsg, 0, sizeof(GetMsg));
@@ -160,6 +160,10 @@ Remote::GetHandle Remote::Get(char *VarName, size_t Step, size_t BlockID, Dims &
     GetMsg.DimCount = Count.size();
     GetMsg.Count = Count.data();
     GetMsg.Start = Start.data();
+    GetMsg.Stride = Stride.data();
+    GetMsg.StencilDimCount = Stencil.shape.size();
+    GetMsg.StencilDims = Stencil.shape.data();
+    GetMsg.Stencil = Stencil.data.data();
     GetMsg.Dest = dest;
     CMwrite(m_conn, ev_state.GetRequestFormat, &GetMsg);
     CMCondition_wait(ev_state.cm, GetMsg.GetResponseCondition);
diff --git a/source/adios2/toolkit/remote/Remote.h b/source/adios2/toolkit/remote/Remote.h
index 9e432516fa..4d0f36984f 100644
--- a/source/adios2/toolkit/remote/Remote.h
+++ b/source/adios2/toolkit/remote/Remote.h
@@ -43,7 +43,8 @@ class Remote
 
     typedef int GetHandle;
 
-    GetHandle Get(char *VarName, size_t Step, size_t BlockID, Dims &Count, Dims &Start, void *dest);
+    GetHandle Get(char *VarName, size_t Step, size_t BlockID, Dims &Count, Dims &Start,
+                  Dims &Stride, DoubleMatrix &Stencil, void *dest);
 
     bool WaitForGet(GetHandle handle);
 
diff --git a/source/adios2/toolkit/remote/remote_common.cpp b/source/adios2/toolkit/remote/remote_common.cpp
index bab73b0462..34083f868f 100644
--- a/source/adios2/toolkit/remote/remote_common.cpp
+++ b/source/adios2/toolkit/remote/remote_common.cpp
@@ -61,6 +61,11 @@ FMField GetRequestList[] = {
     {"DimCount", "integer", sizeof(size_t), FMOffset(GetRequestMsg, DimCount)},
     {"Count", "integer[DimCount]", sizeof(size_t), FMOffset(GetRequestMsg, Count)},
     {"Start", "integer[DimCount]", sizeof(size_t), FMOffset(GetRequestMsg, Start)},
+    {"Stride", "integer[DimCount]", sizeof(size_t), FMOffset(GetRequestMsg, Stride)},
+    {"StencilDimCount", "integer", sizeof(size_t), FMOffset(GetRequestMsg, StencilDimCount)},
+    {"StencilDims", "integer[StencilDimCount]", sizeof(size_t),
+     FMOffset(GetRequestMsg, StencilDims)},
+    {"Stencil", "integer[StencilDimCount]", sizeof(double), FMOffset(GetRequestMsg, Stencil)},
     {"Dest", "integer", sizeof(size_t), FMOffset(GetRequestMsg, Dest)},
     {NULL, NULL, 0, 0}};
 
diff --git a/source/adios2/toolkit/remote/remote_common.h b/source/adios2/toolkit/remote/remote_common.h
index 0d78bd290a..d1203fb7f8 100644
--- a/source/adios2/toolkit/remote/remote_common.h
+++ b/source/adios2/toolkit/remote/remote_common.h
@@ -59,6 +59,10 @@ typedef struct _GetRequestMsg
     int DimCount;
     size_t *Count;
     size_t *Start;
+    size_t *Stride;
+    int StencilDimCount;
+    size_t *StencilDims;
+    double *Stencil;
     void *Dest;
 } *GetRequestMsg;
 
diff --git a/source/adios2/toolkit/remote/remote_server.cpp b/source/adios2/toolkit/remote/remote_server.cpp
index 98ff1bcdcd..5da2d89271 100644
--- a/source/adios2/toolkit/remote/remote_server.cpp
+++ b/source/adios2/toolkit/remote/remote_server.cpp
@@ -248,6 +248,22 @@ static void GetRequestHandler(CManager cm, CMConnection conn, void *vevent, void
         }
     }
 
+    Dims stride;
+    DoubleMatrix stencil;
+    if (GetMsg->Stride)
+    {
+        for (int i = 0; i < GetMsg->DimCount; i++)
+        {
+            stride.push_back(GetMsg->Stride[i]);
+        }
+        Dims stencilDims;
+        for (int i = 0; i < GetMsg->StencilDimCount; i++)
+        {
+            stencilDims.push_back(GetMsg->StencilDims[i]);
+        }
+        stencil = DoubleMatrix(stencilDims, GetMsg->Stencil);
+    }
+
     try
     {
         if (TypeOfVar == adios2::DataType::None)
@@ -266,6 +282,8 @@ static void GetRequestHandler(CManager cm, CMConnection conn, void *vevent, void
             var->SetBlockSelection(GetMsg->BlockID);                                               \
         if (GetMsg->Start)                                                                         \
             var->SetSelection(b);                                                                  \
+        if (GetMsg->Stride)                                                                        \
+            var->SetStride(stride, stencil);                                                       \
         f->m_engine->Get(*var, RetData, Mode::Sync);                                               \
         Response.Size = RetData.size() * sizeof(T);                                                \
         Response.ReadData = (char *)RetData.data();                                                \

From 987e2cd16acfc9eafe7fbdc6c9846a8d02d20383 Mon Sep 17 00:00:00 2001
From: Norbert Podhorszki 
Date: Wed, 24 Jan 2024 09:23:46 -0500
Subject: [PATCH 11/19] Calculate VariableBase::GetTotalSize using striding

---
 source/adios2/core/VariableBase.cpp | 13 ++++++++++++-
 source/adios2/core/VariableBase.h   |  2 +-
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/source/adios2/core/VariableBase.cpp b/source/adios2/core/VariableBase.cpp
index f066e6fa84..e747669cc4 100644
--- a/source/adios2/core/VariableBase.cpp
+++ b/source/adios2/core/VariableBase.cpp
@@ -44,7 +44,18 @@ VariableBase::VariableBase(const std::string &name, const DataType type, const s
     InitShapeType();
 }
 
-size_t VariableBase::TotalSize() const noexcept { return helper::GetTotalSize(m_Count); }
+size_t VariableBase::TotalSize() const noexcept
+{
+    if (m_Stride.empty())
+    {
+        return helper::GetTotalSize(m_Count);
+    }
+    else
+    {
+        auto box = helper::GetStridedSelection(m_Start, m_Count, m_Stride);
+        return helper::GetTotalSize(box.second);
+    }
+}
 
 #if defined(ADIOS2_HAVE_KOKKOS) || defined(ADIOS2_HAVE_GPU_SUPPORT)
 ArrayOrdering VariableBase::GetArrayLayout() { return m_ArrayLayout; }
diff --git a/source/adios2/core/VariableBase.h b/source/adios2/core/VariableBase.h
index 3b2c3b8bff..9ee76b5e78 100644
--- a/source/adios2/core/VariableBase.h
+++ b/source/adios2/core/VariableBase.h
@@ -131,7 +131,7 @@ class VariableBase
     virtual ~VariableBase() = default;
 
     /**
-     * Returns the total number of elements
+     * Returns the total number of elements, using count and striding but without steps
      * @return number of elements
      */
     size_t TotalSize() const noexcept;

From b4b16e07cc39894d95956f77563e922daf3f2507 Mon Sep 17 00:00:00 2001
From: Norbert Podhorszki 
Date: Wed, 24 Jan 2024 09:24:09 -0500
Subject: [PATCH 12/19] fix time+stride

---
 source/utils/bpls/bpls.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/utils/bpls/bpls.cpp b/source/utils/bpls/bpls.cpp
index 9f392d752d..1e94cd12ff 100644
--- a/source/utils/bpls/bpls.cpp
+++ b/source/utils/bpls/bpls.cpp
@@ -2238,9 +2238,9 @@ int readVar(core::Engine *fp, core::IO *io, core::Variable *variable)
                 //        helper::DimsToString(countv).c_str(),
                 //        helper::DimsToString(sel.first).c_str(),
                 //        helper::DimsToString(sel.second).c_str());
-                for (j = 0; j < tdims; j++)
+                for (j = 0; j < tdims - tidx; j++)
                 {
-                    c_stride[j] = sel.second[j];
+                    c_stride[j + tidx] = sel.second[j];
                     // printf("%" PRIu64 " ", c_stride[j]);
                 }
                 // printf("}\n");

From 26e3fb2ef3f03b7e1ef20375c0f8e1f1d5d5c373 Mon Sep 17 00:00:00 2001
From: Norbert Podhorszki 
Date: Wed, 24 Jan 2024 09:24:27 -0500
Subject: [PATCH 13/19] hide debug prints

---
 .../toolkit/format/bp5/BP5Deserializer.cpp    | 52 ++++++-------------
 1 file changed, 15 insertions(+), 37 deletions(-)

diff --git a/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp b/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp
index b33be05089..242f3244c5 100644
--- a/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp
+++ b/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp
@@ -1811,12 +1811,12 @@ void StrideCopy1D(const T *in, const CoreDims &inStart, const CoreDims &inCount,
                   const CoreDims &strideStart, const CoreDims &strideCount,
                   const DoubleMatrix &stencil, MemorySpace MemSpace = MemorySpace::Host)
 {
-    std::cout << "StrideCopy1D: inStart = " << DimsToString(inStart)
+    /*std::cout << "StrideCopy1D: inStart = " << DimsToString(inStart)
               << " inCount = " << DimsToString(inCount) << " outStart = " << DimsToString(outStart)
               << " outCount = " << DimsToString(outCount)
               << " strideStart = " << DimsToString(strideStart)
-              << " srideCount = " << DimsToString(strideCount) << std::endl;
-    print1D("Incoming block", in, inCount);
+              << " srideCount = " << DimsToString(strideCount) << std::endl;*/
+    // print1D("Incoming block", in, inCount);
 
     size_t inPos = strideStart[0];
     for (size_t i = 0; i < outCount[0]; ++i)
@@ -1825,8 +1825,7 @@ void StrideCopy1D(const T *in, const CoreDims &inStart, const CoreDims &inCount,
         inPos += strideCount[0];
     }
 
-    print1D("Outgoing block", out, outCount);
-    std::cout << std::endl;
+    // print1D("Outgoing block", out, outCount);
 }
 
 template 
@@ -1856,8 +1855,8 @@ void StrideCopy2D(const T *in, const CoreDims &inStart, const CoreDims &inCount,
         }
     }
 #else
-    print2D("Incoming stencil", stencil.data.data(), CoreDims(stencil.shape));
-    // window of values to calculate with the stencil
+    // print2D("Incoming stencil", stencil.data.data(), CoreDims(stencil.shape));
+    //  window of values to calculate with the stencil
     std::vector window(stencil.shape[0] * stencil.shape[1]);
     {
         // initialize window: center value is in[strideStart[]],
@@ -1899,7 +1898,7 @@ void StrideCopy2D(const T *in, const CoreDims &inStart, const CoreDims &inCount,
                    stencil.shape[1] * sizeof(T));
         }
     }
-    print2D("Window initialized", window.data(), CoreDims(stencil.shape));
+    // print2D("Window initialized", window.data(), CoreDims(stencil.shape));
 
     size_t outPos = 0;
     for (size_t i = 0; i < outCount[0]; ++i)
@@ -1915,7 +1914,6 @@ void StrideCopy2D(const T *in, const CoreDims &inStart, const CoreDims &inCount,
     }
 #endif
     // print2D("Outgoing block", out, outCount);
-    std::cout << std::endl;
 }
 
 template 
@@ -1930,7 +1928,7 @@ void StrideCopy3D(const T *in, const CoreDims &inStart, const CoreDims &inCount,
               << " outCount = " << DimsToString(outCount)
               << " strideStart = " << DimsToString(strideStart)
               << " srideCount = " << DimsToString(strideCount) << std::endl;
-    print3D("Incoming block", in, inCount);
+    // print3D("Incoming block", in, inCount);
 
     /* FIXME: this is not done yet*/
     size_t outPos = 0;
@@ -1949,8 +1947,7 @@ void StrideCopy3D(const T *in, const CoreDims &inStart, const CoreDims &inCount,
         }
     }
 
-    print3D("Outgoing block", out, outCount);
-    std::cout << std::endl;
+    // print3D("Outgoing block", out, outCount);
 }
 
 template 
@@ -2194,9 +2191,9 @@ void BP5Deserializer::FinalizeGet(const ReadRequest &Read, const bool freeAddr)
             Dims(outStart.begin(), outStart.end()), Dims(outCount.begin(), outCount.end()),
             Dims(inStart.begin(), inStart.end()), Dims(inCount.begin(), inCount.end()));
 
-        std::cout << "selectionBox = " << BoxToString(selectionBox)
+        /*std::cout << "selectionBox = " << BoxToString(selectionBox)
                   << " BlockBox = " << BoxToString(BlockBox)
-                  << " intersectionBox = " << BoxToString(IntersectionBox) << std::endl;
+                  << " intersectionBox = " << BoxToString(IntersectionBox) << std::endl;*/
 
         // calculate stride offset from the stride and
         // from the position of this block inside the selection
@@ -2215,7 +2212,7 @@ void BP5Deserializer::FinalizeGet(const ReadRequest &Read, const bool freeAddr)
                 strideOffset[i] = 0;
             }
         }
-        std::cout << "strideOffset = " << DimsToString(strideOffset) << std::endl;
+        /*std::cout << "strideOffset = " << DimsToString(strideOffset) << std::endl;*/
 
         Box stridedBox = helper::GetStridedSelection(
             IntersectionBox.first, IntersectionBox.second, VB->m_Stride, strideOffset);
@@ -2235,30 +2232,11 @@ void BP5Deserializer::FinalizeGet(const ReadRequest &Read, const bool freeAddr)
 
         size_t nElems = helper::GetTotalSize(stridedBox.second);
 
-        std::cout << "stridedBox = {" << DimsToString(st) << ", " << DimsToString(ct)
-                  << "}  nElems = " << nElems << std::endl;
+        /*std::cout << "stridedBox = {" << DimsToString(st) << ", " << DimsToString(ct)
+                  << "}  nElems = " << nElems << std::endl;*/
 
         stridedData = (char *)calloc(nElems, ElementSize);
         freeStridedData = true;
-
-        /*
-
-        std::cout << "RankOffset = " << DimsToString(DimCount, RankOffset)
-                  << " RankSize = " << DimsToString(DimCount, RankSize)
-                  << " SelOffset = " << DimsToString(DimCount, SelOffset)
-                  << " SelSize = " << DimsToString(DimCount, SelSize)
-                  << " Req.Start= " << DimsToString(Req.Start)
-                  << " Req.Count = " << DimsToString(Req.Count)
-                  << " Stride = " << DimsToString(VB->m_Stride)
-                  << " strideOffset = " << DimsToString(strideOffset)
-                  << " stride nElems = " << nElems << std::endl;
-
-                std::cout << "inStart = " << DimsToString(inStart) << " inCount = " <<
-           DimsToString(inCount)
-                          << " outStart = " << DimsToString(outStart)
-                          << " outCount = " << DimsToString(outCount) << std::endl;
-        */
-
         char *dataptr = IncomingData;
         StrideCopy(((struct BP5VarRec *)Req.VarRec)->Type, dataptr, inStart, inCount, true,
                    stridedData, st, ct, true, strideOffset, strideCount, VB->m_StrideStencil,
@@ -2336,7 +2314,7 @@ void BP5Deserializer::FinalizeGet(const ReadRequest &Read, const bool freeAddr)
         /*std::cout << "NdCopy inStart = " << DimsToString(inStart)
                   << " inCount = " << DimsToString(inCount)
                   << " outStart = " << DimsToString(outStart)
-                  << " outCount = " << DimsToString(outCount) << "\n"
+                  << " outCount = " << DimsToString(outCount) << " data = " << (uint64_t)(Req.Data)
                   << std::endl;*/
         helper::NdCopy(stridedData, inStart, inCount, true, true, (char *)Req.Data, outStart,
                        outCount, true, true, ElementSize, CoreDims(), CoreDims(), CoreDims(),

From 04060539ff49ac3d2605837b752e27f35ba8dedb Mon Sep 17 00:00:00 2001
From: Norbert Podhorszki 
Date: Wed, 24 Jan 2024 09:37:35 -0500
Subject: [PATCH 14/19] update bpls -h result

---
 testing/utils/cwriter/TestUtilsCWriter.bplsh.expected.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/testing/utils/cwriter/TestUtilsCWriter.bplsh.expected.txt b/testing/utils/cwriter/TestUtilsCWriter.bplsh.expected.txt
index 24bd5cd4c4..64fa243c10 100644
--- a/testing/utils/cwriter/TestUtilsCWriter.bplsh.expected.txt
+++ b/testing/utils/cwriter/TestUtilsCWriter.bplsh.expected.txt
@@ -21,6 +21,8 @@ The time dimension is the first dimension then.
   --count     | -c "spec"    Number of elements in each dimension
                                -1 denotes 'until end' of dimension
                                (default is -1 for all dimensions)
+  --stride "spec"            Stride with number of elements in each dimension
+                               Steps are not included in this specification!
   --noindex   | -y           Print data without array indices
   --string    | -S           Print 8bit integer arrays as strings
   --columns   | -n "cols"    Number of data elements per row to print

From eb6f869889be93811675e6d0df1ac54533c1b425 Mon Sep 17 00:00:00 2001
From: Norbert Podhorszki 
Date: Thu, 25 Jan 2024 10:20:08 -0500
Subject: [PATCH 15/19] remote template magic that windows 2019 compiler does
 not recognize

---
 source/adios2/helper/adiosMath.cpp | 28 ++++++----------------------
 source/adios2/helper/adiosMath.h   |  8 +-------
 2 files changed, 7 insertions(+), 29 deletions(-)

diff --git a/source/adios2/helper/adiosMath.cpp b/source/adios2/helper/adiosMath.cpp
index aa024d076a..427106771e 100644
--- a/source/adios2/helper/adiosMath.cpp
+++ b/source/adios2/helper/adiosMath.cpp
@@ -546,30 +546,14 @@ Box GetStridedSelection(const Dims &start, const Dims &count, const Dims &
     return GetStridedSelection(start, count, stride, Dims(count.size(), 0ULL));
 }
 
-template 
-bool equal_within_ulps(
-    T x, T y, std::size_t n = 1,
-    typename std::enable_if::is_integer, T>::type * = 0)
+bool equal_within_ulps(double x, double y, std::size_t n)
 {
     //  Taken from https://en.cppreference.com/w/cpp/types/numeric_limits/epsilon
-
-    // Since `epsilon()` is the gap size (ULP, unit in the last place)
-    // of floating-point numbers in interval [1, 2), we can scale it to
-    // the gap size in interval [2^e, 2^{e+1}), where `e` is the exponent
-    // of `x` and `y`.
-
-    // If `x` and `y` have different gap sizes (which means they have
-    // different exponents), we take the smaller one. Taking the bigger
-    // one is also reasonable, I guess.
-    const T m = std::min(std::fabs(x), std::fabs(y));
-
-    // Subnormal numbers have fixed exponent, which is `min_exponent - 1`.
-    const int exp = m < std::numeric_limits::min() ? std::numeric_limits::min_exponent - 1
-                                                      : std::ilogb(m);
-
-    // We consider `x` and `y` equal if the difference between them is
-    // within `n` ULPs.
-    return std::fabs(x - y) <= n * std::ldexp(std::numeric_limits::epsilon(), exp);
+    const double m = std::min(std::fabs(x), std::fabs(y));
+    const int exp = m < std::numeric_limits::min()
+                        ? std::numeric_limits::min_exponent - 1
+                        : std::ilogb(m);
+    return std::fabs(x - y) <= n * std::ldexp(std::numeric_limits::epsilon(), exp);
 }
 
 } // end namespace helper
diff --git a/source/adios2/helper/adiosMath.h b/source/adios2/helper/adiosMath.h
index 794e929ba3..002edba3f2 100644
--- a/source/adios2/helper/adiosMath.h
+++ b/source/adios2/helper/adiosMath.h
@@ -334,13 +334,7 @@ Box GetStridedSelection(const Dims &start, const Dims &count, const Dims &
  * See example in https://en.cppreference.com/w/cpp/types/numeric_limits/epsilon
  * @brief Return true if the x and y are within n * of the machine epsilon
  */
-template 
-bool equal_within_ulps(
-    T x, T y, std::size_t n = 1,
-    typename std::enable_if::is_integer, bool>::type * = 0)
-{
-    return true;
-}
+bool equal_within_ulps(double x, double y, std::size_t n = 1);
 
 } // end namespace helper
 } // end namespace adios2

From 2d8028b824f01776c206867ad29ea8aaadde7517 Mon Sep 17 00:00:00 2001
From: Norbert Podhorszki 
Date: Thu, 25 Jan 2024 10:20:38 -0500
Subject: [PATCH 16/19] fix compiler warnings

---
 examples/basics/stridedRead/stridedRead2D.cpp | 1 -
 source/adios2/core/VariableBase.cpp           | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/examples/basics/stridedRead/stridedRead2D.cpp b/examples/basics/stridedRead/stridedRead2D.cpp
index 0c4fd4b1ed..6fe44459c2 100644
--- a/examples/basics/stridedRead/stridedRead2D.cpp
+++ b/examples/basics/stridedRead/stridedRead2D.cpp
@@ -93,7 +93,6 @@ void writer(adios2::ADIOS &adios, const std::size_t nx, const std::size_t ny)
     writer.Close();
 }
 
-constexpr double sixteenth = 1.0 / 16;
 constexpr double eighth = 1.0 / 8;
 const std::vector M2 = {
     0.0,    eighth,     0.0,    // 1
diff --git a/source/adios2/core/VariableBase.cpp b/source/adios2/core/VariableBase.cpp
index e747669cc4..4c1f9d3682 100644
--- a/source/adios2/core/VariableBase.cpp
+++ b/source/adios2/core/VariableBase.cpp
@@ -300,7 +300,7 @@ adios2::Accuracy VariableBase::GetAccuracyRequested() const noexcept { return m_
 void VariableBase::SetStride(const Dims &stride, const DoubleMatrix &stencil)
 {
     m_Stride = stride;
-    size_t ndim;
+    size_t ndim = 0;
     if (m_ShapeID == ShapeID::GlobalArray)
     {
         ndim = m_Shape.size();

From 09cb500bb2ebfc2fbafaedcdba75c9a5f8535f55 Mon Sep 17 00:00:00 2001
From: Norbert Podhorszki 
Date: Tue, 13 Feb 2024 06:33:04 -0500
Subject: [PATCH 17/19] Change back to VirtualIncomingData in memory selection
 case to pass all tests, since this case is not yet supported with strides.

---
 source/adios2/toolkit/format/bp5/BP5Deserializer.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp b/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp
index 242f3244c5..76bfa7eef5 100644
--- a/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp
+++ b/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp
@@ -2304,9 +2304,9 @@ void BP5Deserializer::FinalizeGet(const ReadRequest &Read, const bool freeAddr)
             intersectStart[d] += VB->m_MemoryStart[d];
             blockStart[d] += VB->m_MemoryStart[d];
         }
-        helper::NdCopy(stridedData, intersectStart, intersectCount, true, true, (char *)Req.Data,
-                       intersectStart, intersectCount, true, true, ElementSize, intersectStart,
-                       blockCount, memoryStart, memoryCount, false);
+        helper::NdCopy(VirtualIncomingData, intersectStart, intersectCount, true, true,
+                       (char *)Req.Data, intersectStart, intersectCount, true, true, ElementSize,
+                       intersectStart, blockCount, memoryStart, memoryCount, false);
     }
     else
     {

From 003243350c3dfe9aacb0b0c865e2daae3b44b978 Mon Sep 17 00:00:00 2001
From: Norbert Podhorszki 
Date: Tue, 13 Feb 2024 11:07:04 -0500
Subject: [PATCH 18/19] support striding for 3D and N-dim in general. Example
 for 3D added.

---
 examples/basics/stridedRead/CMakeLists.txt    |   4 +
 examples/basics/stridedRead/stridedRead2D.cpp |  12 +-
 examples/basics/stridedRead/stridedRead3D.cpp | 218 ++++++++++++++++++
 .../toolkit/format/bp5/BP5Deserializer.cpp    | 116 ++++++++--
 4 files changed, 320 insertions(+), 30 deletions(-)
 create mode 100644 examples/basics/stridedRead/stridedRead3D.cpp

diff --git a/examples/basics/stridedRead/CMakeLists.txt b/examples/basics/stridedRead/CMakeLists.txt
index e5daf73424..c6f35581c8 100644
--- a/examples/basics/stridedRead/CMakeLists.txt
+++ b/examples/basics/stridedRead/CMakeLists.txt
@@ -19,3 +19,7 @@ add_executable(adios2_basics_stridedRead2D stridedRead2D.cpp)
 target_link_libraries(adios2_basics_stridedRead2D adios2::cxx11 adios2_core)
 install(TARGETS adios2_basics_stridedRead2D RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
 
+add_executable(adios2_basics_stridedRead3D stridedRead3D.cpp)
+target_link_libraries(adios2_basics_stridedRead3D adios2::cxx11 adios2_core)
+install(TARGETS adios2_basics_stridedRead3D RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+
diff --git a/examples/basics/stridedRead/stridedRead2D.cpp b/examples/basics/stridedRead/stridedRead2D.cpp
index 6fe44459c2..0053650403 100644
--- a/examples/basics/stridedRead/stridedRead2D.cpp
+++ b/examples/basics/stridedRead/stridedRead2D.cpp
@@ -62,7 +62,7 @@ void writer(adios2::ADIOS &adios, const std::size_t nx, const std::size_t ny)
 
     adios2::Engine writer = io.Open("stridedRead2D.bp", adios2::Mode::Write);
 
-    const std::vector array = lf_computeTrig(nx, ny);
+    const std::vector array = lf_computeID(nx, ny);
 
     writer.BeginStep();
 
@@ -72,7 +72,7 @@ void writer(adios2::ADIOS &adios, const std::size_t nx, const std::size_t ny)
     size_t qy1 = (ny / 2);
     size_t qy2 = ny - qy1;
 
-    varGlobal2D.SetSelection({{0, 0}, {qx1, qx2}});
+    varGlobal2D.SetSelection({{0, 0}, {qx1, qy1}});
     varGlobal2D.SetMemorySelection({{0, 0}, {nx, ny}});
     writer.Put(varGlobal2D, array.data());
 
@@ -80,7 +80,7 @@ void writer(adios2::ADIOS &adios, const std::size_t nx, const std::size_t ny)
     varGlobal2D.SetMemorySelection({{0, qy1}, {nx, ny}});
     writer.Put(varGlobal2D, array.data());
 
-    varGlobal2D.SetSelection({{qx1, 0}, {qx2, qx2}});
+    varGlobal2D.SetSelection({{qx1, 0}, {qx2, qy1}});
     varGlobal2D.SetMemorySelection({{qx1, 0}, {nx, ny}});
     writer.Put(varGlobal2D, array.data());
 
@@ -153,7 +153,6 @@ void reader(adios2::ADIOS &adios)
         writer.EndStep();
         writer.Close();
 
-        /*
         std::cout << "Global array read with stride = {\n  ";
         size_t pos = 0;
         for (size_t i = 0; i < sel.second[0]; ++i)
@@ -166,7 +165,6 @@ void reader(adios2::ADIOS &adios)
             std::cout << "\n  ";
         }
         std::cout << "}" << std::endl;
-        */
     }
 }
 
@@ -174,8 +172,8 @@ int main(int argc, char *argv[])
 {
     try
     {
-        constexpr std::size_t nx = 100;
-        constexpr std::size_t ny = 100;
+        constexpr std::size_t nx = 105;
+        constexpr std::size_t ny = 99;
         adios2::ADIOS adios;
         writer(adios, nx, ny);
         reader(adios);
diff --git a/examples/basics/stridedRead/stridedRead3D.cpp b/examples/basics/stridedRead/stridedRead3D.cpp
new file mode 100644
index 0000000000..f2c5df76db
--- /dev/null
+++ b/examples/basics/stridedRead/stridedRead3D.cpp
@@ -0,0 +1,218 @@
+/*
+ * Distributed under the OSI-approved Apache License, Version 2.0.  See
+ * accompanying file Copyright.txt for details.
+ *
+ * * stridedRead3D.cpp : example to read from 3D array with a stride
+ *
+ *  Created on: Feb 13, 2024
+ *      Author: Norbert Podhorszki 
+ */
+
+#include   //std::size_t
+#include   // std::setprecision
+#include  // std::cout
+#include    // std::numeric_limits
+#include 
+#include    //std::iota
+#include  //std::exception
+
+#include 
+
+constexpr double TWOPI = 2.0 * M_PI;
+
+void writer(adios2::ADIOS &adios, const std::size_t nx, const std::size_t ny, const std::size_t nz)
+{
+    auto lf_computeTrig = [](const std::size_t nx, const std::size_t ny,
+                             const std::size_t nz) -> std::vector {
+        const double spx = TWOPI / nx;
+        const double spy = TWOPI / ny;
+        const double spz = TWOPI / nz;
+        std::vector array(nx * ny * nz);
+        size_t pos = 0;
+        for (size_t i = 0; i < nx; ++i)
+        {
+            double c = cos(i * spx);
+            for (size_t j = 0; j < ny; ++j)
+            {
+                c += sin(j * spy);
+                for (size_t k = 0; k < nz; ++k)
+                {
+                    array[pos] = c + sin(k * spz);
+                    ++pos;
+                }
+            }
+        }
+        return array;
+    };
+
+    auto lf_computeID = [](const std::size_t nx, const std::size_t ny,
+                           const std::size_t nz) -> std::vector {
+        std::vector array(nx * ny * nz);
+        size_t pos = 0;
+        for (size_t i = 0; i < nx; ++i)
+        {
+            double ci = i * 1.0;
+            for (size_t j = 0; j < ny; ++j)
+            {
+                double c = ci + j * 0.01;
+                for (size_t k = 0; k < nz; ++k)
+                {
+                    array[pos] = c;
+                    c += 0.0001;
+                    pos++;
+                }
+            }
+        }
+        return array;
+    };
+
+    adios2::IO io = adios.DeclareIO("stridedRead3D-writer");
+
+    const adios2::Dims shape = {nx, ny, nz};
+    adios2::Variable varGlobal3D =
+        io.DefineVariable("global3d", shape, {0, 0, 0}, shape);
+
+    adios2::Engine writer = io.Open("stridedRead3D.bp", adios2::Mode::Write);
+
+    const std::vector array = lf_computeID(nx, ny, nz);
+
+    writer.BeginStep();
+
+    // let's write the global array as eight separate writes
+    size_t qx[2];
+    qx[0] = (nx / 2);
+    qx[1] = nx - qx[0];
+    size_t ox[2] = {0, qx[0]};
+
+    size_t qy[2];
+    qy[0] = (ny / 2);
+    qy[1] = ny - qy[0];
+    size_t oy[2] = {0, qy[0]};
+
+    size_t qz[2];
+    qz[0] = (nz / 2);
+    qz[1] = nz - qz[0];
+    size_t oz[2] = {0, qz[0]};
+
+    for (size_t x = 0; x < 2; ++x)
+    {
+        for (size_t y = 0; y < 2; ++y)
+        {
+            for (size_t z = 0; z < 2; ++z)
+            {
+                varGlobal3D.SetSelection({{ox[x], oy[y], oz[z]}, {qx[x], qy[y], qz[z]}});
+                varGlobal3D.SetMemorySelection({{ox[x], oy[y], oz[z]}, {nx, ny, nz}});
+                writer.Put(varGlobal3D, array.data());
+            }
+        }
+    }
+    writer.EndStep();
+    writer.Close();
+}
+
+constexpr double twelfth = 1.0 / 12.0;
+const std::vector M3 = {
+    0.0,     0.0,         0.0, // 1
+    0.0,     twelfth,     0.0, // 2
+    0.0,     0.0,         0.0, // 3
+
+    0.0,     twelfth,     0.0,     // 1
+    twelfth, 6 * twelfth, twelfth, // 2
+    0.0,     twelfth,     0.0,     // 3
+
+    0.0,     0.0,         0.0, // 1
+    0.0,     twelfth,     0.0, // 2
+    0.0,     0.0,         0.0, // 3
+};
+
+void reader(adios2::ADIOS &adios)
+{
+    adios2::IO io = adios.DeclareIO("stridedRead3D-reader");
+    io.SetParameter("Threads", "1");
+    adios2::Engine reader = io.Open("stridedRead3D.bp", adios2::Mode::Read);
+    reader.BeginStep();
+
+    adios2::DoubleMatrix stencil3D({3, 3, 3}, M3);
+    adios2::Dims stride = {2, 2, 2};
+
+    adios2::Variable varGlobal3D = io.InquireVariable("global3d");
+    std::vector global3D;
+    varGlobal3D.SetSelection(
+        {{3, 1, 2},
+         {varGlobal3D.Shape()[0] - 3, varGlobal3D.Shape()[1] - 1, varGlobal3D.Shape()[2] - 2}});
+    varGlobal3D.SetStride(stride, stencil3D);
+    size_t sg = varGlobal3D.SelectionSize();
+    global3D.resize(sg);
+
+    auto sel = varGlobal3D.Selection();
+    {
+        // details about the selection after striding
+        std::cout << "Global array selection: size is " << sg << " start = { ";
+        for (auto s : sel.first)
+        {
+            std::cout << s << " ";
+        }
+        std::cout << "}, count = { ";
+        for (auto s : sel.second)
+        {
+            std::cout << s << " ";
+        }
+        std::cout << "}" << std::endl;
+    }
+
+    reader.Get(varGlobal3D, global3D);
+
+    reader.EndStep();
+    reader.Close();
+
+    // write out the result
+    {
+        adios2::IO io = adios.DeclareIO("stridedRead3D-write-again");
+        std::string outname = "global3d-" + std::to_string(stride[0]) + "-" +
+                              std::to_string(stride[1]) + "-" + std::to_string(stride[2]);
+        adios2::Variable v = io.DefineVariable(outname, sel.second, {0, 0, 0},
+                                                               sel.second, adios2::ConstantDims);
+
+        adios2::Engine writer = io.Open("stridedRead3D.bp", adios2::Mode::Append);
+        writer.BeginStep();
+        writer.Put(v, global3D.data());
+        writer.EndStep();
+        writer.Close();
+
+        std::cout << "Global array read with stride = {\n  ";
+        size_t pos = 0;
+        for (size_t i = 0; i < sel.second[0]; ++i)
+        {
+            // size_t pos = i * sel.second[1];
+            for (size_t j = 0; j < sel.second[1]; ++j)
+            {
+                for (size_t k = 0; k < sel.second[2]; ++k)
+                {
+                    std::cout << global3D[pos++] << " ";
+                }
+                std::cout << "\n  ";
+            }
+            std::cout << "\n  ";
+        }
+        std::cout << "}" << std::endl;
+    }
+}
+
+int main(int argc, char *argv[])
+{
+    try
+    {
+        constexpr std::size_t nx = 11;
+        constexpr std::size_t ny = 7;
+        constexpr std::size_t nz = 9;
+        adios2::ADIOS adios;
+        writer(adios, nx, ny, nz);
+        reader(adios);
+    }
+    catch (const std::exception &e)
+    {
+        std::cout << "ERROR: ADIOS2 exception: " << e.what() << "\n";
+    }
+
+    return 0;
+}
diff --git a/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp b/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp
index 76bfa7eef5..df6dab4914 100644
--- a/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp
+++ b/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp
@@ -37,6 +37,7 @@
 #include 
 #include 
 #include 
+#include  // std::accumulate
 #include 
 
 using namespace adios2::helper;
@@ -1786,22 +1787,23 @@ static void print2D(const std::string name, const T *in, const CoreDims &count)
 template 
 static void print3D(const std::string name, const T *in, const CoreDims &count)
 {
-    std::cout << name << " = {\n"; // << std::setprecision(2);
+    std::cout << name << " = \n{"; // << std::setprecision(2);
     size_t pos = 0;
     for (size_t i = 0; i < count[0]; ++i)
     {
-        std::cout << "    {\n    ";
+        std::cout << "\n    {";
         for (size_t j = 0; j < count[1]; ++j)
         {
+            std::cout << "\n        ";
             for (size_t k = 0; k < count[2]; ++k)
             {
                 std::cout << in[pos] << " ";
                 ++pos;
             }
         }
-        std::cout << "    }\n";
+        std::cout << "\n    }";
     }
-    std::cout << "  }" << std::endl;
+    std::cout << "\n}" << std::endl;
 }
 
 template 
@@ -1819,9 +1821,9 @@ void StrideCopy1D(const T *in, const CoreDims &inStart, const CoreDims &inCount,
     // print1D("Incoming block", in, inCount);
 
     size_t inPos = strideStart[0];
-    for (size_t i = 0; i < outCount[0]; ++i)
+    for (size_t z = 0; z < outCount[0]; ++z)
     {
-        out[i] = in[inPos];
+        out[z] = in[inPos];
         inPos += strideCount[0];
     }
 
@@ -1835,12 +1837,12 @@ void StrideCopy2D(const T *in, const CoreDims &inStart, const CoreDims &inCount,
                   const CoreDims &strideStart, const CoreDims &strideCount,
                   const DoubleMatrix &stencil, MemorySpace MemSpace = MemorySpace::Host)
 {
-    /*std::cout << "StrideCopy2D: inStart = " << DimsToString(inStart)
+    std::cout << "StrideCopy2D: inStart = " << DimsToString(inStart)
               << " inCount = " << DimsToString(inCount) << " outStart = " << DimsToString(outStart)
               << " outCount = " << DimsToString(outCount)
               << " strideStart = " << DimsToString(strideStart)
-              << " srideCount = " << DimsToString(strideCount) << std::endl;*/
-    // print2D("Incoming block", in, inCount);
+              << " srideCount = " << DimsToString(strideCount) << std::endl;
+    print2D("Incoming block", in, inCount);
 #if 0
     size_t outPos = 0;
     for (size_t i = 0; i < outCount[0]; ++i)
@@ -1901,11 +1903,11 @@ void StrideCopy2D(const T *in, const CoreDims &inStart, const CoreDims &inCount,
     // print2D("Window initialized", window.data(), CoreDims(stencil.shape));
 
     size_t outPos = 0;
-    for (size_t i = 0; i < outCount[0]; ++i)
+    for (size_t y = 0; y < outCount[0]; ++y)
     {
-        size_t rowIn = strideStart[0] + i * strideCount[0];
+        size_t rowIn = y * strideCount[0] + strideStart[0];
         size_t inPos = rowIn * inCount[1] + strideStart[1];
-        for (size_t j = 0; j < outCount[1]; ++j)
+        for (size_t z = 0; z < outCount[1]; ++z)
         {
             out[outPos] = in[inPos];
             ++outPos;
@@ -1913,7 +1915,7 @@ void StrideCopy2D(const T *in, const CoreDims &inStart, const CoreDims &inCount,
         }
     }
 #endif
-    // print2D("Outgoing block", out, outCount);
+    print2D("Outgoing block", out, outCount);
 }
 
 template 
@@ -1928,23 +1930,89 @@ void StrideCopy3D(const T *in, const CoreDims &inStart, const CoreDims &inCount,
               << " outCount = " << DimsToString(outCount)
               << " strideStart = " << DimsToString(strideStart)
               << " srideCount = " << DimsToString(strideCount) << std::endl;
-    // print3D("Incoming block", in, inCount);
+    print3D("Incoming block", in, inCount);
 
-    /* FIXME: this is not done yet*/
     size_t outPos = 0;
-    for (size_t i = 0; i < outCount[0]; ++i)
+    for (size_t x = 0; x < outCount[0]; ++x)
     {
-        size_t rowIn = strideStart[0] + i * strideCount[0];
-        size_t inPos = rowIn * inCount[1] + strideStart[1];
-        for (size_t j = 0; j < outCount[1]; ++j)
+        size_t inStartX = x * strideCount[0] + strideStart[0];
+        size_t inPosX = inStartX * inCount[1] * inCount[2];
+        std::cout << " Slice " << x << " inStartX = " << inStartX << " inPosX = " << inPosX
+                  << std::endl;
+        for (size_t y = 0; y < outCount[1]; ++y)
         {
-            for (size_t k = 0; k < outCount[3]; ++k)
+            size_t rowIn = y * strideCount[1] + strideStart[1];
+            size_t inPos = inPosX + rowIn * inCount[2] + strideStart[2];
+            for (size_t z = 0; z < outCount[2]; ++z)
             {
                 out[outPos] = in[inPos];
                 ++outPos;
-                inPos += strideCount[1];
+                inPos += strideCount[2];
+            }
+        }
+    }
+
+    print3D("Outgoing block", out, outCount);
+}
+
+template 
+void StrideCopyND(const T *in, const CoreDims &inStart, const CoreDims &inCount,
+                  const bool inIsLittleEndian, T *out, const CoreDims &outStart,
+                  const CoreDims &outCount, const bool outIsLittleEndian,
+                  const CoreDims &strideStart, const CoreDims &strideCount,
+                  const DoubleMatrix &stencil, MemorySpace MemSpace = MemorySpace::Host)
+{
+    auto lf_incrementCounter = [&](std::vector &counter, const CoreDims &outCount,
+                                   const size_t ndim) {
+        for (int d = (int)ndim - 1; d >= 0; --d)
+        {
+            if (counter[d] < outCount[d] - 1)
+            {
+                ++counter[d];
+                break;
+            }
+            else
+            {
+                counter[d] = 0;
             }
         }
+    };
+
+    std::cout << "StrideCopyND: inStart = " << DimsToString(inStart)
+              << " inCount = " << DimsToString(inCount) << " outStart = " << DimsToString(outStart)
+              << " outCount = " << DimsToString(outCount)
+              << " strideStart = " << DimsToString(strideStart)
+              << " srideCount = " << DimsToString(strideCount) << std::endl;
+    // print3D("Incoming block", in, inCount);
+
+    size_t outPos = 0;
+    size_t ndim = inCount.size();
+    size_t nTotal = std::accumulate(outCount.begin(), outCount.end(), 1, std::multiplies());
+    std::vector counter(ndim, 0);
+
+    std::vector prodSizes(ndim, 1);
+    for (size_t d = ndim - 1; d > 0; --d)
+    {
+        prodSizes[d - 1] = inCount[d] * prodSizes[d];
+    }
+
+    while (outPos < nTotal)
+    {
+        size_t inPos = 0;
+        for (size_t d = 0; d < ndim - 1; ++d)
+        {
+            size_t inStartX = counter[d] * strideCount[d] + strideStart[d];
+            inPos += inStartX * prodSizes[d];
+        }
+        inPos += strideStart[ndim - 1];
+        for (size_t z = 0; z < outCount[ndim - 1]; ++z)
+        {
+            out[outPos] = in[inPos];
+            ++outPos;
+            inPos += strideCount[ndim - 1];
+        }
+        // increment by one in first N-1 dimensions (leave alone the last dim)
+        lf_incrementCounter(counter, outCount, ndim - 1);
     }
 
     // print3D("Outgoing block", out, outCount);
@@ -1957,7 +2025,9 @@ void StrideCopyT(const T *in, const CoreDims &inStart, const CoreDims &inCount,
                  const CoreDims &strideStart, const CoreDims &strideCount,
                  const DoubleMatrix &stencil, MemorySpace MemSpace = MemorySpace::Host)
 {
-    switch (inCount.size())
+    return StrideCopyND(in, inStart, inCount, inIsLittleEndian, out, outStart, outCount,
+                        outIsLittleEndian, strideStart, strideCount, stencil, MemSpace);
+    /*switch (inCount.size())
     {
     case 1:
         return StrideCopy1D(in, inStart, inCount, inIsLittleEndian, out, outStart, outCount,
@@ -1976,7 +2046,7 @@ void StrideCopyT(const T *in, const CoreDims &inStart, const CoreDims &inCount,
             "Toolkit", "format::bp::BP5Deserializer", "StrideCopyT",
             "Dimension " + std::to_string(inCount.size()) + "not supported");
         break;
-    }
+    }*/
 }
 
 static inline void StrideCopy(const DataType dtype, const void *in, const CoreDims &inStart,

From f5f68ba60197f39ae5bdb501f3af3c9e5ee66fd0 Mon Sep 17 00:00:00 2001
From: Norbert Podhorszki 
Date: Wed, 14 Feb 2024 13:37:02 -0500
Subject: [PATCH 19/19] Python: Add variable.set_stride() and
 variable.selection() functions, as well as bindings.Variable.SetStride() and
 bindings.Variable.Selection() functions. Add (single) stride value input to
 Gray-Scott's plotter.

---
 bindings/Python/py11Variable.cpp              | 26 +++++++++++++
 bindings/Python/py11Variable.h                |  4 ++
 bindings/Python/py11glue.cpp                  |  2 +
 .../simulations/gray-scott/plot/gsplot.py     | 37 +++++++++++++------
 python/adios2/file_reader.py                  |  1 +
 python/adios2/stream.py                       |  7 +++-
 python/adios2/variable.py                     | 25 +++++++++++--
 7 files changed, 86 insertions(+), 16 deletions(-)

diff --git a/bindings/Python/py11Variable.cpp b/bindings/Python/py11Variable.cpp
index a7acfcd553..f63180870e 100644
--- a/bindings/Python/py11Variable.cpp
+++ b/bindings/Python/py11Variable.cpp
@@ -69,6 +69,32 @@ size_t Variable::SelectionSize() const
     return size;
 }
 
+Box Variable::Selection() const
+{
+    helper::CheckForNullptr(m_VariableBase, "in call to Variable::Selection");
+    const adios2::DataType typeCpp = m_VariableBase->m_Type;
+
+    if (typeCpp == adios2::DataType::Struct)
+    {
+        // not supported
+    }
+#define declare_template_instantiation(T)                                                          \
+    else if (typeCpp == adios2::helper::GetDataType())                                          \
+    {                                                                                              \
+        const adios2::core::Variable *variable =                                                \
+            dynamic_cast *>(m_VariableBase);                       \
+        return variable->Selection();                                                              \
+    }
+    ADIOS2_FOREACH_STDTYPE_1ARG(declare_template_instantiation)
+#undef declare_template_instantiation
+}
+
+void Variable::SetStride(const Dims &stride)
+{
+    helper::CheckForNullptr(m_VariableBase, "in call to Variable::SetStride");
+    return m_VariableBase->SetStride(stride);
+}
+
 size_t Variable::AddOperation(const Operator op, const Params ¶meters)
 {
     helper::CheckForNullptr(m_VariableBase, "in call to Variable::AddOperation");
diff --git a/bindings/Python/py11Variable.h b/bindings/Python/py11Variable.h
index b9ab9c1511..9ea6eadabf 100644
--- a/bindings/Python/py11Variable.h
+++ b/bindings/Python/py11Variable.h
@@ -47,6 +47,10 @@ class Variable
 
     size_t SelectionSize() const;
 
+    Box Selection() const;
+
+    void SetStride(const Dims &stride);
+
     std::string Name() const;
 
     std::string Type() const;
diff --git a/bindings/Python/py11glue.cpp b/bindings/Python/py11glue.cpp
index 90b12ba1e9..f91399b88a 100644
--- a/bindings/Python/py11glue.cpp
+++ b/bindings/Python/py11glue.cpp
@@ -313,6 +313,8 @@ PYBIND11_MODULE(ADIOS2_PYTHON_MODULE_NAME, m)
         .def("SetSelection", &adios2::py11::Variable::SetSelection)
         .def("SetStepSelection", &adios2::py11::Variable::SetStepSelection)
         .def("SelectionSize", &adios2::py11::Variable::SelectionSize)
+        .def("SetStride", &adios2::py11::Variable::SetStride)
+        .def("Selection", &adios2::py11::Variable::Selection)
         .def("Name", &adios2::py11::Variable::Name)
         .def("Type", &adios2::py11::Variable::Type)
         .def("Sizeof", &adios2::py11::Variable::Sizeof)
diff --git a/examples/simulations/gray-scott/plot/gsplot.py b/examples/simulations/gray-scott/plot/gsplot.py
index 5c1cd2d528..7ce7fa4c71 100644
--- a/examples/simulations/gray-scott/plot/gsplot.py
+++ b/examples/simulations/gray-scott/plot/gsplot.py
@@ -49,12 +49,19 @@ def SetupArgs():
         help="The 2D plane to be displayed/stored xy/yz/xz/all",
         default="yz",
     )
+    parser.add_argument(
+        "--stride",
+        "-stride",
+        help="Stride (same value for all dimensions)",
+        default="1",
+    )
     args = parser.parse_args()
 
     args.displaysec = float(args.displaysec)
     args.nx = int(args.nx)
     args.ny = int(args.ny)
     args.nz = int(args.nz)
+    args.stride = int(args.stride)
 
     if args.plane not in ("xz", "yz", "xy", "all"):
         raise ValueError("Input argument --plane must be one of xz/yz/xy/all")
@@ -113,9 +120,9 @@ def Plot2D(plane_direction, data, args, fullshape, step, fontsize):
     plt.clf()
 
 
-def read_data(args, fr, start_coord, size_dims):
+def read_data(args, fr, start_coord, size_dims, stride):
     var1 = args.varname
-    data = fr.read(var1, start_coord, size_dims)
+    data = fr.read(var1, start_coord, size_dims, stride=stride)
     data = np.squeeze(data)
     return data
 
@@ -153,12 +160,20 @@ def read_data(args, fr, start_coord, size_dims):
         #        print (vars_info)
         shape3_str = vars_info[args.varname]["Shape"].split(",")
         shape3 = list(map(int, shape3_str))
-        sim_step = fr_step.read("step")
+        stride = []
+        stridedshape = []
+        for i in range(len(shape3)):
+            stride.append(args.stride) 
+            stridedshape.append(int(shape3[i] / args.stride))
+
+        sim_step = fr_step.read("step")[0]
 
         if myrank == 0:
             print(
-                "GS Plot step {0} processing simulation output step {1} or computation"
-                "step {2}".format(plot_step, cur_step, sim_step),
+                f"GS Plot step {plot_step} processing simulation output step {cur_step}"
+                f" or computation step {sim_step}\n",
+                f"    variable  {args.varname} full shape {fullshape}"
+                f" stride {stride} strided shape {stridedshape}",
                 flush=True,
             )
         #            if cur_step == 0:
@@ -166,21 +181,21 @@ def read_data(args, fr, start_coord, size_dims):
 
         if args.plane in ("xy", "all"):
             data = read_data(
-                args, fr_step, [0, 0, int(shape3[2] / 2)], [shape3[0], shape3[1], 1]
+                args, fr_step, [0, 0, int(shape3[2] / 2)], [shape3[0], shape3[1], 1], stride
             )
-            Plot2D("xy", data, args, fullshape, sim_step, fontsize)
+            Plot2D("xy", data, args, stridedshape, sim_step, fontsize)
 
         if args.plane in ("xz", "all"):
             data = read_data(
-                args, fr_step, [0, int(shape3[1] / 2), 0], [shape3[0], 1, shape3[2]]
+                args, fr_step, [0, int(shape3[1] / 2), 0], [shape3[0], 1, shape3[2]], stride
             )
-            Plot2D("xz", data, args, fullshape, sim_step, fontsize)
+            Plot2D("xz", data, args, stridedshape, sim_step, fontsize)
 
         if args.plane in ("yz", "all"):
             data = read_data(
-                args, fr_step, [int(shape3[0] / 2), 0, 0], [1, shape3[1], shape3[2]]
+                args, fr_step, [int(shape3[0] / 2), 0, 0], [1, shape3[1], shape3[2]], stride
             )
-            Plot2D("yz", data, args, fullshape, sim_step, fontsize)
+            Plot2D("yz", data, args, stridedshape, sim_step, fontsize)
         plot_step = plot_step + 1
 
     fr.close()
diff --git a/python/adios2/file_reader.py b/python/adios2/file_reader.py
index ac40791a6f..ee351e7e26 100644
--- a/python/adios2/file_reader.py
+++ b/python/adios2/file_reader.py
@@ -5,6 +5,7 @@
 from functools import singledispatchmethod
 from adios2 import Stream, IO
 
+
 # pylint: disable=W0221
 class FileReader(Stream):
     """High level implementation of the FileReader class for read Random access mode"""
diff --git a/python/adios2/stream.py b/python/adios2/stream.py
index d737864290..1f55346039 100644
--- a/python/adios2/stream.py
+++ b/python/adios2/stream.py
@@ -311,7 +311,7 @@ def read(self, variable: Variable):
                 resulting array from selection
         """
         dtype = type_adios_to_numpy(variable.type())
-        count = variable.count()
+        count = variable.selection()[1]
         if count != []:
             # array
             # steps = variable.get_steps_from_step_selection()
@@ -341,7 +341,7 @@ def read(self, variable: Variable):
         return output
 
     @read.register(str)
-    def _(self, name: str, start=[], count=[], block_id=None, step_selection=None):
+    def _(self, name: str, start=[], count=[], block_id=None, step_selection=None, stride=None):
         """
         Random access read allowed to select steps,
         only valid with Stream Engines
@@ -386,6 +386,9 @@ def _(self, name: str, start=[], count=[], block_id=None, step_selection=None):
         if start != [] and count != []:
             variable.set_selection([start, count])
 
+        if stride:
+            variable.set_stride(stride)
+
         return self.read(variable)
 
     def write_attribute(self, name, content, variable_name="", separator="/"):
diff --git a/python/adios2/variable.py b/python/adios2/variable.py
index 865a550536..3971945013 100644
--- a/python/adios2/variable.py
+++ b/python/adios2/variable.py
@@ -38,7 +38,7 @@ def count(self):
         Current selected count for this variable.
 
         Returns:
-            int: Current selected count.
+            list (int): Current selected count.
         """
         return self.impl.Count()
 
@@ -51,6 +51,15 @@ def selection_size(self):
         """
         return self.impl.SelectionSize()
 
+    def selection(self):
+        """
+        Current selection for this variable after set_selection() and set_stride()
+
+        Returns:
+            list (int): Current selection
+        """
+        return self.impl.Selection()
+
     def set_block_selection(self, block_id):
         """
         Set BlockID for this variable.
@@ -88,6 +97,16 @@ def set_step_selection(self, step_selection):
         """
         self.impl.SetStepSelection(step_selection)
 
+    def set_stride(self, stride):
+        """
+        Set Stridin(dimensions) for this variable
+
+        Args:
+            stride (list): stride value for each dimension,
+                Each value must be at least 1.
+        """
+        self.impl.SetStride(stride)
+
     def shape(self, step=None):
         """
         Get the shape assigned to the given step for this variable.
@@ -131,10 +150,10 @@ def single_value(self):
 
     def sizeof(self):
         """
-        Size in bytes of the contents of the variable.
+        Size in bytes of the data type of the variable.
 
         Returns:
-            int: size in bytes of the contents.
+            int: size in bytes of the content data type.
         """
         return self.impl.Sizeof()