From 2504542f5d8003076fa33d3306d34a209bc8a4f6 Mon Sep 17 00:00:00 2001 From: Axel Huebl Date: Sun, 14 Feb 2021 19:34:00 -0800 Subject: [PATCH] CMake: Fix library type control (#930) This is a follow-up to #900, fixing that the project-level library type was not used in `add_library` calls. --- include/openPMD/IO/HDF5/HDF5Auxiliary.hpp | 20 ++++- src/IO/HDF5/HDF5Auxiliary.cpp | 90 ++++++++++++++++++++++- src/IO/HDF5/HDF5IOHandler.cpp | 27 +++++-- 3 files changed, 125 insertions(+), 12 deletions(-) diff --git a/include/openPMD/IO/HDF5/HDF5Auxiliary.hpp b/include/openPMD/IO/HDF5/HDF5Auxiliary.hpp index 2e70ae5a12..88481889d5 100644 --- a/include/openPMD/IO/HDF5/HDF5Auxiliary.hpp +++ b/include/openPMD/IO/HDF5/HDF5Auxiliary.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2021 Fabian Koller +/* Copyright 2017-2021 Fabian Koller, Felix Schmitt, Axel Huebl * * This file is part of openPMD-api. * @@ -34,7 +34,6 @@ namespace openPMD { -#if openPMD_HAVE_HDF5 struct GetH5DataType { std::unordered_map< std::string, hid_t > m_userTypes; @@ -54,5 +53,20 @@ namespace openPMD std::string concrete_h5_file_position(Writable* w); -#endif + /** Computes the chunk dimensions for a dataset. + * + * Chunk dimensions are selected to create chunks sizes between + * 64KByte and 4MB. Smaller chunk sizes are inefficient due to overhead, + * larger chunks do not map well to file system blocks and striding. + * + * Chunk dimensions are less or equal to dataset dimensions and do + * not need to be a factor of the respective dataset dimension. + * + * @param[in] dims dimensions of dataset to get chunk dims for + * @param[in] typeSize size of each element in bytes + * @return array for resulting chunk dimensions + */ + inline std::vector< hsize_t > + getOptimalChunkDims( std::vector< hsize_t > const dims, + size_t const typeSize ); } // namespace openPMD diff --git a/src/IO/HDF5/HDF5Auxiliary.cpp b/src/IO/HDF5/HDF5Auxiliary.cpp index 23af642f5f..1136c0486b 100644 --- a/src/IO/HDF5/HDF5Auxiliary.cpp +++ b/src/IO/HDF5/HDF5Auxiliary.cpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2021 Fabian Koller, Axel Huebl +/* Copyright 2017-2021 Fabian Koller, Felix Schmitt, Axel Huebl * * This file is part of openPMD-api. * @@ -306,4 +306,92 @@ openPMD::concrete_h5_file_position(Writable* w) return auxiliary::replace_all(pos, "//", "/"); } + +std::vector< hsize_t > +openPMD::getOptimalChunkDims( std::vector< hsize_t > const dims, + size_t const typeSize ) +{ + auto const ndims = dims.size(); + std::vector< hsize_t > chunk_dims( dims.size() ); + + // chunk sizes in KiByte + constexpr std::array< size_t, 7u > CHUNK_SIZES_KiB + {{4096u, 2048u, 1024u, 512u, 256u, 128u, 64u}}; + + size_t total_data_size = typeSize; + size_t max_chunk_size = typeSize; + size_t target_chunk_size = 0u; + + // compute the order of dimensions (descending) + // large dataset dimensions should have larger chunk sizes + std::multimap dims_order; + for (uint32_t i = 0; i < ndims; ++i) + dims_order.insert(std::make_pair(dims[i], i)); + + for (uint32_t i = 0; i < ndims; ++i) + { + // initial number of chunks per dimension + chunk_dims[i] = 1; + + // try to make at least two chunks for each dimension + size_t half_dim = dims[i] / 2; + + // compute sizes + max_chunk_size *= (half_dim > 0) ? half_dim : 1; + total_data_size *= dims[i]; + } + + // compute the target chunk size + for( auto const & chunk_size : CHUNK_SIZES_KiB ) + { + target_chunk_size = chunk_size * 1024; + if (target_chunk_size <= max_chunk_size) + break; + } + + size_t current_chunk_size = typeSize; + size_t last_chunk_diff = target_chunk_size; + std::multimap::const_iterator current_index = + dims_order.begin(); + + while (current_chunk_size < target_chunk_size) + { + // test if increasing chunk size optimizes towards target chunk size + size_t chunk_diff = target_chunk_size - (current_chunk_size * 2u); + if (chunk_diff >= last_chunk_diff) + break; + + // find next dimension to increase chunk size for + int can_increase_dim = 0; + for (uint32_t d = 0; d < ndims; ++d) + { + int current_dim = current_index->second; + + // increasing chunk size possible + if (chunk_dims[current_dim] * 2 <= dims[current_dim]) + { + chunk_dims[current_dim] *= 2; + current_chunk_size *= 2; + can_increase_dim = 1; + } + + current_index++; + if (current_index == dims_order.end()) + current_index = dims_order.begin(); + + if (can_increase_dim) + break; + } + + // can not increase chunk size in any dimension + // we must use the current chunk sizes + if (!can_increase_dim) + break; + + last_chunk_diff = chunk_diff; + } + + return chunk_dims; +} + #endif diff --git a/src/IO/HDF5/HDF5IOHandler.cpp b/src/IO/HDF5/HDF5IOHandler.cpp index 4ef7c08384..2605eefd11 100644 --- a/src/IO/HDF5/HDF5IOHandler.cpp +++ b/src/IO/HDF5/HDF5IOHandler.cpp @@ -22,6 +22,7 @@ #include "openPMD/IO/HDF5/HDF5IOHandlerImpl.hpp" #if openPMD_HAVE_HDF5 +# include "openPMD/Datatype.hpp" # include "openPMD/auxiliary/Filesystem.hpp" # include "openPMD/auxiliary/StringManip.hpp" # include "openPMD/backend/Attribute.hpp" @@ -256,21 +257,30 @@ HDF5IOHandlerImpl::createDataset(Writable* writable, Attribute a(0); a.dtype = d; std::vector< hsize_t > dims; - for( auto const& val : parameters.extent ) + std::uint64_t num_elements = 1u; + for( auto const& val : parameters.extent ) { dims.push_back(static_cast< hsize_t >(val)); + num_elements *= val; + } hid_t space = H5Screate_simple(static_cast< int >(dims.size()), dims.data(), dims.data()); VERIFY(space >= 0, "[HDF5] Internal error: Failed to create dataspace during dataset creation"); - std::vector< hsize_t > chunkDims; - for( auto const& val : parameters.chunkSize ) - chunkDims.push_back(static_cast< hsize_t >(val)); - /* enable chunking on the created dataspace */ hid_t datasetCreationProperty = H5Pcreate(H5P_DATASET_CREATE); - herr_t status; - //status = H5Pset_chunk(datasetCreationProperty, chunkDims.size(), chunkDims.data()); - //VERIFY(status == 0, "[HDF5] Internal error: Failed to set chunk size during dataset creation"); + + if( num_elements != 0u ) + { + // get chunking dimensions + std::vector< hsize_t > chunk_dims = getOptimalChunkDims(dims, toBytes(d)); + + // TODO: allow overwrite with user-provided chunk size + //for( auto const& val : parameters.chunkSize ) + // chunk_dims.push_back(static_cast< hsize_t >(val)); + + herr_t status = H5Pset_chunk(datasetCreationProperty, chunk_dims.size(), chunk_dims.data()); + VERIFY(status == 0, "[HDF5] Internal error: Failed to set chunk size during dataset creation"); + } std::string const& compression = parameters.compression; if( !compression.empty() ) @@ -318,6 +328,7 @@ HDF5IOHandlerImpl::createDataset(Writable* writable, H5P_DEFAULT); VERIFY(group_id >= 0, "[HDF5] Internal error: Failed to create HDF5 group during dataset creation"); + herr_t status; status = H5Dclose(group_id); VERIFY(status == 0, "[HDF5] Internal error: Failed to close HDF5 dataset during dataset creation"); status = H5Tclose(datatype);