diff --git a/.gitignore b/.gitignore index 35a8d5ab87..ab5d48025c 100644 --- a/.gitignore +++ b/.gitignore @@ -44,3 +44,11 @@ CMakeSettings.json # Python wheels stuff *.egg-info/ + +# CMake generated files +build/ +.idea/ +.vscode/ + +# redis dump files +*.rdb \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 5fa8697873..aaf9a18af7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -189,6 +189,7 @@ adios_option(AWSSDK "Enable support for S3 compatible storage using AWS SDK' adios_option(Derived_Variable "Enable support for derived variables" OFF) adios_option(PIP "Enable support for pip packaging" OFF) adios_option(XRootD "Enable support for XRootD" AUTO) +adios_option(KVCACHE "Enable support for KVCache" AUTO) option(ADIOS2_LIBADIOS_MODE "Install only C/C++ library components" OFF) mark_as_advanced(ADIOS2_LIBADIOS_MODE) @@ -265,7 +266,7 @@ set(ADIOS2_CONFIG_OPTS DataMan DataSpaces HDF5 HDF5_VOL MHS SST Fortran MPI Python PIP Blosc2 BZip2 LIBPRESSIO MGARD MGARD_MDR PNG SZ ZFP DAOS IME O_DIRECT Sodium Catalyst SysVShMem UCX ZeroMQ Profiling Endian_Reverse Derived_Variable AWSSDK XRootD GPU_Support CUDA Kokkos - Kokkos_CUDA Kokkos_HIP Kokkos_SYCL Campaign + Kokkos_CUDA Kokkos_HIP Kokkos_SYCL Campaign KVCACHE ) GenerateADIOSHeaderConfig(${ADIOS2_CONFIG_OPTS}) diff --git a/cmake/DetectOptions.cmake b/cmake/DetectOptions.cmake index 84553d0de1..9e1958ce9d 100644 --- a/cmake/DetectOptions.cmake +++ b/cmake/DetectOptions.cmake @@ -606,6 +606,21 @@ elseif(ADIOS2_USE_Campaign) endif() endif() +# KVCache +if(ADIOS2_USE_Cache STREQUAL AUTO) + find_package(hiredis) + if (hiredis_FOUND) + message(STATUS "hiredis found. Turn on KVCache") + set(ADIOS2_HAVE_KVCACHE TRUE) + endif() +elseif(ADIOS2_USE_Cache) + find_package(hiredis REQUIRED) + if (hiredis_FOUND) + message(STATUS "hiredis found. Turn on KVCache") + set(ADIOS2_HAVE_KVCACHE TRUE) + endif() +endif() + # Multithreading find_package(Threads REQUIRED) diff --git a/scripts/build_scripts/build-adios2-kvcache.sh b/scripts/build_scripts/build-adios2-kvcache.sh new file mode 100644 index 0000000000..4d99867412 --- /dev/null +++ b/scripts/build_scripts/build-adios2-kvcache.sh @@ -0,0 +1,113 @@ +#!/bin/bash + +# This script is to build the remote server with cache enabled. +# Attention: hiredis cannot be installed by apt-get, since the default version is too old (libhiredis0.14 (= 0.14.1-2)). +# We need to build it from source code. You can also use the following scripts to install hiredis (v1.2.0). + +# sample usage: in project home directory: +# source scripts/build_scripts/build-adios2-kvcache.sh --build +# source scripts/build_scripts/build-adios2-kvcache.sh --start +# source scripts/build_scripts/build-adios2-kvcache.sh --stop + +if [ -z "${BUILD_DIR}" ] +then + BUILD_DIR="${PWD}"/build +fi + +if [ ! -d "${BUILD_DIR}" ] +then + mkdir -p "${BUILD_DIR}" +fi + +SW_DIR="${BUILD_DIR}"/sw +if [ ! -d "${SW_DIR}" ] +then + mkdir -p "${SW_DIR}" +fi + +build_cache() { + # redis - in-memory data structure store + redis_dir="${SW_DIR}"/redis-7.2.3 + if [ ! -d "${redis_dir}" ] + then + cd "${SW_DIR}" || exit + curl -L -o redis-7.2.3.tar.gz https://github.com/redis/redis/archive/refs/tags/7.2.3.tar.gz + tar -xzf redis-7.2.3.tar.gz + rm redis-7.2.3.tar.gz + cd "${redis_dir}" || exit + # cannot accleerate by 'make -j8'. It will cause error. + make + + # hiredis - C client library to connect Redis server + cd "${redis_dir}"/deps/hiredis || exit + mkdir build && cd build || exit + cmake .. -DCMAKE_INSTALL_PREFIX="${SW_DIR}"/hiredis + make -j32 + make install + fi + + cd "${BUILD_DIR}" || exit + cmake .. -DADIOS2_USE_Cache=ON \ + -DADIOS2_USE_Python=ON \ + -DCMAKE_PREFIX_PATH="${SW_DIR}/hiredis" \ + -DCMAKE_INSTALL_PREFIX="${SW_DIR}"/adios2 + + make -j32 + make install + cd "${BUILD_DIR}"/../ || exit + echo "Build completed." + unset BUILD_DIR + + export DoRemote=1 + export useKVCache=1 +} + +start_services() { + echo "Starting redis server and setting environment variables..." + export PYTHONPATH=${SW_DIR}/adios2/local/lib/python3.10/dist-packages/ + nohup "${SW_DIR}"/redis-7.2.3/src/redis-server > "${SW_DIR}"/redis_server.log 2>&1 & + nohup "${SW_DIR}"/adios2/bin/adios2_remote_server -v > "${SW_DIR}"/remote_server.log 2>&1 & + sleep 5 + nohup "${SW_DIR}"/redis-7.2.3/src/redis-cli monitor > "${SW_DIR}"/redis_monitor.log 2>&1 & + echo "Services started and environment variables set." +} + +# Function to stop services (optional, example purpose) +stop_services() { + echo "Stopping services..." + pkill -f redis-server + pkill -f redis-cli + pkill -f remote_server + unset DoRemote + unset useKVCache + unset PYTHONPATH + echo "Services stopped." +} + +# Parse command line options +while [[ "$1" != "" ]]; do + case "$1" in + -b | --build ) + build_cache + ;; + -c | --start ) + start_services + ;; + -s | --stop ) + stop_services + ;; + -h | --help ) + echo "Usage: $0 [OPTIONS]" + echo "Options:" + echo " -b, --build Build the software with cache enabled" + echo " -c, --start Start redis server and set environment variables" + echo " -s, --stop Stop the services" + echo " -h, --help Display this help message" + ;; + * ) + echo "Invalid option: $1" + echo "Use -h or --help for usage information." + ;; + esac + shift +done diff --git a/source/adios2/CMakeLists.txt b/source/adios2/CMakeLists.txt index 3a14cd1c1d..6e810c300b 100644 --- a/source/adios2/CMakeLists.txt +++ b/source/adios2/CMakeLists.txt @@ -247,7 +247,12 @@ if (ADIOS2_HAVE_SST) target_link_libraries(adios2_core PRIVATE adios2::thirdparty::EVPath) add_subdirectory(toolkit/remote) endif() - + +if (ADIOS2_HAVE_KVCACHE) + target_sources(adios2_core PRIVATE toolkit/kvcache/KVCacheCommon.cpp) + target_link_libraries(adios2_core PRIVATE hiredis::hiredis) +endif () + if(ADIOS2_HAVE_Campaign) target_sources(adios2_core PRIVATE engine/campaign/CampaignReader.cpp diff --git a/source/adios2/engine/bp5/BP5Reader.cpp b/source/adios2/engine/bp5/BP5Reader.cpp index 339cb16586..5ee9dff327 100644 --- a/source/adios2/engine/bp5/BP5Reader.cpp +++ b/source/adios2/engine/bp5/BP5Reader.cpp @@ -311,6 +311,12 @@ void BP5Reader::PerformGets() m_Remote->Open("localhost", EVPathRemoteCommon::ServerPort, RemoteName, m_OpenMode, RowMajorOrdering); } +#endif +#ifdef ADIOS2_HAVE_KVCACHE + if (getenv("useKVCache")) + { + m_KVCache.OpenConnection(); + } #endif if (m_Remote == nullptr) { @@ -330,7 +336,18 @@ void BP5Reader::PerformGets() if (m_Remote) { +#ifdef ADIOS2_HAVE_KVCACHE + if (getenv("useKVCache")) + { + PerformRemoteGetsWithKVCache(); + } + else + { + PerformRemoteGets(); + } +#else PerformRemoteGets(); +#endif } else { @@ -344,6 +361,154 @@ void BP5Reader::PerformGets() } } +void BP5Reader::PerformRemoteGetsWithKVCache() +{ + auto GetRequests = m_BP5Deserializer->PendingGetRequests; + std::vector handles; + + struct RequestInfo + { + size_t ReqSeq; + size_t TypeSize; + size_t ReqSize; + std::string CacheKey; + bool DirectCopy; + kvcache::QueryBox ReqBox; + void *Data; + + // Constructor to initialize Start and Count with DimCount + RequestInfo(size_t dimCount) : ReqBox(dimCount) {} + }; + std::vector remoteRequestsInfo; + std::vector cachedRequestsInfo; + + for (size_t req_seq = 0; req_seq < GetRequests.size(); req_seq++) + { + const auto &Req = GetRequests[req_seq]; + const DataType varType = m_IO.InquireVariableType(Req.VarName); + + RequestInfo ReqInfo(Req.Count.size()); + ReqInfo.ReqSeq = req_seq; + ReqInfo.TypeSize = helper::GetDataTypeSize(varType); + + kvcache::QueryBox targetBox(Req.Start, Req.Count); + std::string keyPrefix = + Req.VarName + std::to_string(Req.RelStep) + std::to_string(Req.BlockID); + std::string targetKey = keyPrefix + targetBox.toString(); + + // Exact Match: check if targetKey exists + if (m_KVCache.Exists(targetKey)) + { + ReqInfo.CacheKey = targetKey; + ReqInfo.DirectCopy = true; + ReqInfo.ReqSize = targetBox.size(); + cachedRequestsInfo.push_back(ReqInfo); + } + else + { + int max_depth = 999; + if (getenv("maxDepth")) + { + max_depth = std::stoi(getenv("maxDepth")); + } + + std::unordered_set samePrefixKeys; + std::vector regularBoxes; + std::vector cachedBoxes; + m_KVCache.KeyPrefixExistence(keyPrefix, samePrefixKeys); + + if (samePrefixKeys.size() > 0) + { + targetBox.GetMaxInteractBox(samePrefixKeys, max_depth, 0, regularBoxes, + cachedBoxes); + } + else + { + regularBoxes.push_back(targetBox); + } + + std::cout << "Going to retrieve " << regularBoxes.size() + << " boxes from remote server, and " << cachedBoxes.size() + << " boxes from cache" << std::endl; + + // Get data from remote server + for (auto &box : regularBoxes) + { + + ReqInfo.ReqSize = box.size(); + ReqInfo.CacheKey = keyPrefix + box.toString(); + ReqInfo.ReqBox = box; + ReqInfo.Data = malloc(ReqInfo.ReqSize * ReqInfo.TypeSize); + std::vector start; + std::vector count; + box.StartToVector(start); + box.CountToVector(count); + auto handle = m_Remote->Get(Req.VarName, Req.RelStep, Req.BlockID, count, start, + ReqInfo.Data); + handles.push_back(handle); + remoteRequestsInfo.push_back(ReqInfo); + } + + // Get data from cache + for (auto &box : cachedBoxes) + { + ReqInfo.CacheKey = keyPrefix + box.toString(); + ReqInfo.ReqBox = box; + ReqInfo.DirectCopy = false; + cachedRequestsInfo.push_back(ReqInfo); + } + } + } + + // Get data from cache server + for (auto &ReqInfo : cachedRequestsInfo) + { + m_KVCache.AppendCommandInBatch(ReqInfo.CacheKey.c_str(), 1, 0, nullptr); + } + + for (auto &ReqInfo : cachedRequestsInfo) + { + auto &Req = GetRequests[ReqInfo.ReqSeq]; + if (ReqInfo.DirectCopy) + { + m_KVCache.ExecuteBatch(ReqInfo.CacheKey.c_str(), 1, ReqInfo.ReqSize * ReqInfo.TypeSize, + Req.Data); + } + else + { + void *data = malloc(ReqInfo.ReqBox.size() * ReqInfo.TypeSize); + m_KVCache.ExecuteBatch(ReqInfo.CacheKey.c_str(), 1, + ReqInfo.ReqBox.size() * ReqInfo.TypeSize, data); + helper::NdCopy(reinterpret_cast(data), ReqInfo.ReqBox.Start, + ReqInfo.ReqBox.Count, true, false, reinterpret_cast(Req.Data), + Req.Start, Req.Count, true, false, static_cast(ReqInfo.TypeSize)); + free(data); + } + } + + for (size_t handle_seq = 0; handle_seq < handles.size(); handle_seq++) + { + auto handle = handles[handle_seq]; + m_Remote->WaitForGet(handle); + auto &ReqInfo = remoteRequestsInfo[handle_seq]; + auto &Req = GetRequests[ReqInfo.ReqSeq]; + helper::NdCopy(reinterpret_cast(ReqInfo.Data), ReqInfo.ReqBox.Start, + ReqInfo.ReqBox.Count, true, false, reinterpret_cast(Req.Data), + Req.Start, Req.Count, true, false, static_cast(ReqInfo.TypeSize)); + + m_KVCache.AppendCommandInBatch(ReqInfo.CacheKey.c_str(), 0, + ReqInfo.ReqSize * ReqInfo.TypeSize, ReqInfo.Data); + free(ReqInfo.Data); + } + + // Execute batch commands of Set + for (size_t handle_seq = 0; handle_seq < handles.size(); handle_seq++) + { + auto &ReqInfo = remoteRequestsInfo[handle_seq]; + m_KVCache.ExecuteBatch(ReqInfo.CacheKey.c_str(), 0, 0, nullptr); + } +} + void BP5Reader::PerformRemoteGets() { // TP startGenerate = NOW(); diff --git a/source/adios2/engine/bp5/BP5Reader.h b/source/adios2/engine/bp5/BP5Reader.h index a0418c5244..11357a875b 100644 --- a/source/adios2/engine/bp5/BP5Reader.h +++ b/source/adios2/engine/bp5/BP5Reader.h @@ -17,6 +17,7 @@ #include "adios2/helper/adiosRangeFilter.h" #include "adios2/toolkit/format/bp5/BP5Deserializer.h" #include "adios2/toolkit/format/buffer/heap/BufferMalloc.h" +#include "adios2/toolkit/kvcache/KVCacheCommon.h" #include "adios2/toolkit/remote/Remote.h" #include "adios2/toolkit/transportman/TransportMan.h" @@ -99,6 +100,9 @@ class BP5Reader : public BP5Engine, public Engine bool m_WriterIsActive = true; adios2::profiling::JSONProfiler m_JSONProfiler; + /* KVCache for remote data */ + kvcache::KVCacheCommon m_KVCache; + /** used for per-step reads, TODO: to be moved to BP5Deserializer */ size_t m_CurrentStep = 0; size_t m_StepsCount = 0; @@ -254,6 +258,8 @@ class BP5Reader : public BP5Engine, public Engine void PerformRemoteGets(); + void PerformRemoteGetsWithKVCache(); + void DestructorClose(bool Verbose) noexcept; /* Communicator connecting ranks on each Compute Node. diff --git a/source/adios2/toolkit/kvcache/KVCacheCommon.cpp b/source/adios2/toolkit/kvcache/KVCacheCommon.cpp new file mode 100644 index 0000000000..2dc094ce4f --- /dev/null +++ b/source/adios2/toolkit/kvcache/KVCacheCommon.cpp @@ -0,0 +1,131 @@ +// +// Created by cguo51 on 12/30/23. +// +#ifndef KVCACHECOMMON_CPP +#define KVCACHECOMMON_CPP + +#include "KVCacheCommon.h" + +namespace adios2 +{ +namespace kvcache +{ +void KVCacheCommon::OpenConnection(std::string host, int port) +{ + m_redisContext = redisConnect(host.c_str(), port); + if (m_redisContext == NULL || m_redisContext->err) + { + std::cout << "Error to connect to kvcache server: " << m_redisContext->errstr << std::endl; + if (m_redisContext) + { + redisFree(m_redisContext); + } + } + else + { + std::cout << "------------------------------------------------------------" << std::endl; + std::cout << "Connected to kvcache server. KV Cache Version Control: V1.0" << std::endl; + } +} + +void KVCacheCommon::CloseConnection() +{ + if (m_redisContext != nullptr) + { + m_redisContext = nullptr; + std::cout << "KVCache connection closed" << std::endl; + } +} + +void KVCacheCommon::Set(const char *key, size_t size, void *data) +{ + m_redisReply = (redisReply *)redisCommand(m_redisContext, "SET %s %b", key, data, size); + if (m_redisReply == NULL) + { + std::cout << "Error to set key: " << key << std::endl; + } + else + { + std::cout << "SET Key: " << key << " Value size: " << size << std::endl; + freeReplyObject(m_redisReply); + } +} + +void KVCacheCommon::Get(const char *key, size_t size, void *data) +{ + m_redisReply = (redisReply *)redisCommand(m_redisContext, "GET %s", key); + if (m_redisReply == NULL) + { + std::cout << "Error to get key: " << key << std::endl; + } + else + { + memcpy(data, m_redisReply->str, size); + freeReplyObject(m_redisReply); + } +} + +void KVCacheCommon::AppendCommandInBatch(const char *key, size_t mode, size_t size, void *data) +{ + if (mode == 0) + { + redisAppendCommand(m_redisContext, "SET %s %b", key, data, size); + } + else if (mode == 1) + { + redisAppendCommand(m_redisContext, "GET %s", key); + } +} + +void KVCacheCommon::ExecuteBatch(const char *key, size_t mode, size_t size, void *data) +{ + if (redisGetReply(m_redisContext, (void **)&m_redisReply) == REDIS_OK) + { + if (mode == 1) + { + memcpy(data, m_redisReply->str, size); + } + freeReplyObject(m_redisReply); + } + else + { + std::cout << "Error to execute batch command: " << key << std::endl; + } +} + +bool KVCacheCommon::Exists(std::string key) +{ + m_redisReply = (redisReply *)redisCommand(m_redisContext, "EXISTS %s", key.c_str()); + if (m_redisReply != NULL) + { + if (!m_redisReply->integer) + { + std::cout << "The Key: " << key << " does not exist" << std::endl; + return false; + } + freeReplyObject(m_redisReply); + return true; + } + return false; +} + +void KVCacheCommon::KeyPrefixExistence(const std::string &key_prefix, + std::unordered_set &keys) +{ + m_redisReply = (redisReply *)redisCommand(m_redisContext, "KEYS %s*", key_prefix.c_str()); + if (m_redisReply == NULL) + { + std::cout << "Error to get keys with prefix: " << key_prefix << std::endl; + } + else + { + for (int i = 0; i < m_redisReply->elements; i++) + { + keys.insert(m_redisReply->element[i]->str); + } + freeReplyObject(m_redisReply); + } +} +}; // namespace kvcache +}; // namespace adios2 +#endif // KVCACHECOMMON_CPP \ No newline at end of file diff --git a/source/adios2/toolkit/kvcache/KVCacheCommon.h b/source/adios2/toolkit/kvcache/KVCacheCommon.h new file mode 100644 index 0000000000..f17f25117a --- /dev/null +++ b/source/adios2/toolkit/kvcache/KVCacheCommon.h @@ -0,0 +1,64 @@ +// +// Created by cguo51 on 12/30/23. +// + +#ifndef ADIOS2_KVCACHECOMMON_H +#define ADIOS2_KVCACHECOMMON_H +#include "QueryBox.h" +#include // For memcpy +#include +#include + +#ifdef ADIOS2_HAVE_KVCACHE +#include +#endif + +namespace adios2 +{ + +namespace kvcache +{ + +class KVCacheCommon +{ +#ifdef ADIOS2_HAVE_KVCACHE +private: + redisContext *m_redisContext; + redisReply *m_redisReply; + +public: + ~KVCacheCommon() { CloseConnection(); } + + void OpenConnection(std::string host = "localhost", int port = 6379); + + void CloseConnection(); + + void Set(const char *key, size_t size, void *data); + + void Get(const char *key, size_t size, void *data); + + // Batch operations in pipeline, mode 0 for SET, 1 for GET + void AppendCommandInBatch(const char *key, size_t mode, size_t size, void *data); + + void ExecuteBatch(const char *key, size_t mode, size_t size, void *data); + + bool Exists(std::string key); + + void KeyPrefixExistence(const std::string &key_prefix, std::unordered_set &keys); +#else +public: + KVCacheCommon() = default; + ~KVCacheCommon() = default; + void OpenConnection(){}; + void CloseConnection(){}; + void AppendCommandInBatch(const char *key, size_t mode, size_t size, void *data){}; + void ExecuteBatch(const char *key, size_t mode, size_t size, void *data){}; + bool Exists(std::string key) { return false; }; + void KeyPrefixExistence(const std::string &key_prefix, std::unordered_set &keys){}; + +#endif /* ADIOS2_HAVE_KVCACHE */ +}; +}; // namespace kvcache +}; // adios2 + +#endif // ADIOS2_KVCACHECOMMON_H diff --git a/source/adios2/toolkit/kvcache/QueryBox.h b/source/adios2/toolkit/kvcache/QueryBox.h new file mode 100644 index 0000000000..860a391fdb --- /dev/null +++ b/source/adios2/toolkit/kvcache/QueryBox.h @@ -0,0 +1,314 @@ +// +// Created by cguo51 on 7/27/23. +// + +#ifndef ADIOS2_KVCACHE_QUERYBOX_H +#define ADIOS2_KVCACHE_QUERYBOX_H + +#include +#include +#include +#include +#include + +namespace adios2 +{ + +namespace kvcache +{ + +// QueryBox is a class to represent a box in a multi-dimensional space +class QueryBox +{ +public: + helper::DimsArray Start; + helper::DimsArray Count; + + explicit QueryBox(size_t dimCount) : Start(dimCount), Count(dimCount) + { + for (size_t i = 0; i < dimCount; ++i) + { + Start[i] = 0; + Count[i] = 0; + } + } + + QueryBox(const helper::DimsArray &start, const helper::DimsArray &count) + : Start(start), Count(count) + { + } + + QueryBox(const QueryBox &box) : Start(box.Start), Count(box.Count) {} + + // size + size_t size() const + { + size_t s = 1; + for (size_t i = 0; i < Count.size(); ++i) + { + s *= Count[i]; + } + return s; + } + + // ToString + std::string toString() const + { + std::string str = "|Start_"; + for (size_t i = 0; i < Start.size(); ++i) + { + str += std::to_string(Start[i]); + if (i != Start.size() - 1) + { + str += "_"; + } + } + str += "|Count_"; + for (size_t i = 0; i < Start.size(); ++i) + { + str += std::to_string(Count[i]); + if (i != Start.size() - 1) + { + str += "_"; + } + } + str += "|"; + return str; + } + + // determine if a box is equal to another box + bool operator==(const QueryBox &box) const + { + return std::strcmp(toString().c_str(), box.toString().c_str()) == 0; + } + + // Assignment operator + QueryBox &operator=(const QueryBox &box) + { + if (this == &box) + { + return *this; // handle self-assignment + } + + // Copy elements + for (size_t i = 0; i < box.Start.size(); ++i) + { + Start[i] = box.Start[i]; + Count[i] = box.Count[i]; + } + + return *this; + } + + // convert helper::DimsArray to std::vector + void StartToVector(std::vector &vec) const + { + for (size_t i = 0; i < Start.size(); ++i) + { + vec.push_back(Start[i]); + } + } + + void CountToVector(std::vector &vec) const + { + for (size_t i = 0; i < Count.size(); ++i) + { + vec.push_back(Count[i]); + } + } + + // check if *this is interacted in another box, return the new intersection box pointer + bool IsInteracted(const QueryBox &box, QueryBox &intersection) const + { + if (Start.size() != box.Start.size()) + { + return false; + } + + for (size_t i = 0; i < Start.size(); ++i) + { + if (Start[i] > box.Start[i] + box.Count[i] || box.Start[i] > Start[i] + Count[i]) + { + return false; + } + } + + for (size_t i = 0; i < Start.size(); ++i) + { + intersection.Start[i] = std::max(Start[i], box.Start[i]); + intersection.Count[i] = + std::min(Start[i] + Count[i], box.Start[i] + box.Count[i]) - intersection.Start[i]; + } + return true; + } + + // cut *this from big box, return a list of regular boxes + // Note: *this must be fully contained by big box + void NdCut(const QueryBox &bigBox, std::vector ®ularBoxes) + { + if (bigBox == *this) + { + return; + } + + // find the cut dimension with the biggest size + size_t maxCutDimSize = 0; + QueryBox maxCutDimBox(Start.size()); + for (size_t i = 0; i < Start.size(); ++i) + { + // if the start and count are the same, means no cut in this dimension, skip + if (Start[i] == bigBox.Start[i] && Count[i] == bigBox.Count[i]) + { + continue; + } + else + { + if (Start[i] != bigBox.Start[i]) + { + size_t cutDimDiff = Start[i] - bigBox.Start[i]; + size_t cutDimSize = bigBox.size() / bigBox.Count[i] * cutDimDiff; + if (cutDimSize > maxCutDimSize) + { + maxCutDimSize = cutDimSize; + maxCutDimBox = bigBox; + maxCutDimBox.Count[i] = cutDimDiff; + } + } + if (Start[i] + Count[i] != bigBox.Start[i] + bigBox.Count[i]) + { + size_t cutDimDiff = bigBox.Start[i] + bigBox.Count[i] - Start[i] - Count[i]; + size_t cutDimSize = bigBox.size() / bigBox.Count[i] * cutDimDiff; + if (cutDimSize > maxCutDimSize) + { + maxCutDimSize = cutDimSize; + maxCutDimBox = bigBox; + maxCutDimBox.Start[i] = Start[i] + Count[i]; + maxCutDimBox.Count[i] = cutDimDiff; + } + } + } + } + + // cut the max cut dimension + if (maxCutDimSize > 0) + { + regularBoxes.push_back(maxCutDimBox); + QueryBox bigBoxRemained(bigBox); + for (size_t i = 0; i < Start.size(); ++i) + { + if (maxCutDimBox.Start[i] == bigBox.Start[i] && + maxCutDimBox.Count[i] == bigBox.Count[i]) + { + continue; + } + else + { + if (maxCutDimBox.Start[i] == bigBox.Start[i]) + { + bigBoxRemained.Start[i] = maxCutDimBox.Start[i] + maxCutDimBox.Count[i]; + bigBoxRemained.Count[i] = + bigBox.Start[i] + bigBox.Count[i] - bigBoxRemained.Start[i]; + } + else + { + bigBoxRemained.Count[i] = maxCutDimBox.Start[i] - bigBox.Start[i]; + } + } + } + + NdCut(bigBoxRemained, regularBoxes); + } + } + + void GetMaxInteractBox(const std::unordered_set &samePrefixKeys, + const size_t &max_depth, size_t current_depth, + std::vector ®ularBoxes, std::vector &cachedBoxes) + { + // Lambda function to extract dimensions from key string + auto lf_ExtractDimensions = [](const std::string &key, const std::string &delimiter, + helper::DimsArray &data) { + size_t pos = key.find(delimiter); + if (pos == std::string::npos) + { + throw std::invalid_argument("Delimiter not found in key"); + } + size_t end = key.find("|", pos + delimiter.length()); + if (end == std::string::npos) + { + throw std::invalid_argument("End delimiter not found in key"); + } + std::string dimStr = + key.substr(pos + delimiter.length(), end - pos - delimiter.length()); + std::istringstream dimStream(dimStr); + std::string token; + size_t i = 0; + while (std::getline(dimStream, token, '_')) + { + data[i] = std::stoul(token); + i++; + } + }; + + if (current_depth > max_depth) + { + return; + } + current_depth++; + + QueryBox maxSourceBox(this->Start.size()); + QueryBox maxInteract(this->Start.size()); + for (auto &key : samePrefixKeys) + { + // Initialize the box from the key + size_t DimCount = std::count(key.begin(), key.end(), '_') / 2; + QueryBox box(DimCount); + lf_ExtractDimensions(key, "|Start_", box.Start); + lf_ExtractDimensions(key, "|Count_", box.Count); + + QueryBox intersection(this->Start.size()); + if (this->IsInteracted(box, intersection)) + { + if (maxInteract.size() < intersection.size()) + { + maxInteract = intersection; + maxSourceBox = box; + } + } + } + + if (maxInteract.size() == 0) + { + regularBoxes.push_back(*this); + // existing cache has no intersection with new request, return + return; + } + + cachedBoxes.push_back(maxSourceBox); + + // If the interaction of current box is equal to the new request, return, avoid cutting + if (this->size() == maxInteract.size()) + { + return; + } + + if (current_depth == max_depth) + { + maxInteract.NdCut(*this, regularBoxes); + } + else + { + std::vector nextBoxes; + maxInteract.NdCut(*this, nextBoxes); + for (auto &box : nextBoxes) + { + box.GetMaxInteractBox(samePrefixKeys, max_depth, current_depth, regularBoxes, + cachedBoxes); + } + } + } +}; + +} // namespace kvcache +} // namespace adios2 + +#endif // ADIOS2_KVCACHE_QUERYBOX_H