diff --git a/.docker/dev_base.Dockerfile b/.docker/dev_base.Dockerfile index fde596838..79378eeb2 100644 --- a/.docker/dev_base.Dockerfile +++ b/.docker/dev_base.Dockerfile @@ -4,13 +4,55 @@ ARG ARCH FROM ${ARCH}ubuntu:jammy -RUN echo "ARCH=${ARCH}" && sleep 3 - ARG ARCH_CODE -RUN echo "ARCH_CODE=${ARCH_CODE}" && sleep 3 +ENV JULIA_HOME=/opt/julia +ENV WORK_SPACE=/home/project +ENV LIBFABRIC_SRC_DIR=$WORK_SPACE/source/libfabric +ENV LIBFABRIC_DIR=$WORK_SPACE/install/libfabric +ENV MERCURY_SRC_DIR=$WORK_SPACE/source/mercury +ENV MERCURY_DIR=$WORK_SPACE/install/mercury + +ENV PDC_SRC_DIR=$WORK_SPACE/source/pdc +ENV PDC_DIR=$WORK_SPACE/install/pdc + +ENV LD_LIBRARY_PATH="$LIBFABRIC_DIR/lib:$LD_LIBRARY_PATH" +ENV PATH="$LIBFABRIC_DIR/include:$LIBFABRIC_DIR/lib:$PATH" +ENV LD_LIBRARY_PATH="$MERCURY_DIR/lib:$LD_LIBRARY_PATH" +ENV PATH="$MERCURY_DIR/include:$MERCURY_DIR/lib:$PATH" + + +ENV MERCURY_CMAKE_FLAGS="-DCMAKE_INSTALL_PREFIX=$MERCURY_DIR -DCMAKE_C_COMPILER=mpicc -DBUILD_SHARED_LIBS=ON -DBUILD_TESTING=ON -DNA_USE_OFI=ON -DNA_USE_SM=OFF -DNA_OFI_TESTING_PROTOCOL=tcp " +ENV CLANG_FORMAT_PATH=$WORK_SPACE/software/clang-format-lint-action/clang-format/clang-format10 + # Install necessary tools, MPICH, UUID library and developer files -RUN apt-get update && apt-get install -y \ +RUN echo "ARCH=${ARCH}" && echo "ARCH_CODE=${ARCH_CODE}" && sleep 3 && mkdir -p $WORK_SPACE && \ + mkdir -p $WORK_SPACE/software && \ + mkdir -p $WORK_SPACE/source && \ + mkdir -p $WORK_SPACE/install && \ + mkdir -p $LIBFABRIC_SRC_DIR && \ + mkdir -p $MERCURY_SRC_DIR && \ + mkdir -p $PDC_SRC_DIR && \ + mkdir -p $LIBFABRIC_DIR && \ + mkdir -p $MERCURY_DIR && \ + mkdir -p $PDC_DIR && \ + rm -rf $LIBFABRIC_SRC_DIR/* && \ + rm -rf $MERCURY_SRC_DIR/* && \ + rm -rf $PDC_SRC_DIR/* && \ + rm -rf $LIBFABRIC_DIR/* && \ + rm -rf $MERCURY_DIR/* && \ + rm -rf $PDC_DIR/* && \ + echo "export LIBFABRIC_SRC_DIR=$WORK_SPACE/source/libfabric" > $WORK_SPACE/pdc_env.sh && \ + echo "export LIBFABRIC_DIR=$WORK_SPACE/install/libfabric" >> $WORK_SPACE/pdc_env.sh && \ + echo "export MERCURY_SRC_DIR=$WORK_SPACE/source/mercury" >> $WORK_SPACE/pdc_env.sh && \ + echo "export MERCURY_DIR=$WORK_SPACE/install/mercury" >> $WORK_SPACE/pdc_env.sh && \ + echo "export PDC_SRC_DIR=$WORK_SPACE/source/pdc" >> $WORK_SPACE/pdc_env.sh && \ + echo "export PDC_DIR=$WORK_SPACE/install/pdc" >> $WORK_SPACE/pdc_env.sh && \ + echo 'export LD_LIBRARY_PATH=$LIBFABRIC_DIR/lib:$LD_LIBRARY_PATH' >> $WORK_SPACE/pdc_env.sh && \ + echo 'export PATH=$LIBFABRIC_DIR/include:$LIBFABRIC_DIR/lib:$PATH' >> $WORK_SPACE/pdc_env.sh && \ + echo 'export LD_LIBRARY_PATH=$MERCURY_DIR/lib:$LD_LIBRARY_PATH' >> $WORK_SPACE/pdc_env.sh \ + echo 'export PATH=$MERCURY_DIR/include:$MERCURY_DIR/lib:$PATH' >> $WORK_SPACE/pdc_env.sh && \ + apt-get update && apt-get install -y \ build-essential \ git \ mpich \ @@ -28,107 +70,47 @@ RUN apt-get update && apt-get install -y \ wget \ axel \ curl \ + bc \ vim \ nano \ gdb \ cgdb \ curl \ valgrind \ - python3 - -# Install Oh My Bash -RUN bash -c "$(curl -fsSL https://raw.githubusercontent.com/ohmybash/oh-my-bash/master/tools/install.sh)" && \ - sed -i 's/OSH_THEME="font"/OSH_THEME="powerline-multiline"/g' ~/.bashrc - -# Install Julia - -RUN echo "https://julialang-s3.julialang.org/bin/linux/aarch64/1.6/julia-1.6.7-linux-aarch64.tar.gz" > /julia_url_arm64v8.txt && \ - echo "https://julialang-s3.julialang.org/bin/linux/x64/1.6/julia-1.6.7-linux-x86_64.tar.gz" > /julia_url_amd64.txt - -RUN echo $(cat /julia_url_${ARCH_CODE}.txt) && sleep 3 - -RUN mkdir -p /opt/julia && wget -O - $(cat /julia_url_${ARCH_CODE}.txt) | tar -xz -C /opt/julia --strip-components=1 && \ - ln -s /opt/julia/bin/julia /usr/local/bin/julia - -RUN rm -rf /tmp/julia_url_*.txt - -ENV JULIA_HOME=/opt/julia - -# Install Rust -RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y -RUN echo 'source $HOME/.cargo/env' >> ~/.bashrc - - -# Set WORK_SPACE environment variable and create necessary directories -ENV WORK_SPACE=/home/project -RUN mkdir -p $WORK_SPACE - -# Install clang-format repo -RUN mkdir -p $WORK_SPACE/software -RUN cd $WORK_SPACE/software && git clone https://github.com/DoozyX/clang-format-lint-action.git -ENV CLANG_FORMAT_PATH=$WORK_SPACE/software/clang-format-lint-action/clang-format/clang-format10 - -# Clone the repositories -WORKDIR $WORK_SPACE/source -RUN git clone https://github.com/ofiwg/libfabric.git && \ - git clone https://github.com/mercury-hpc/mercury.git --recursive - -ENV LIBFABRIC_SRC_DIR=$WORK_SPACE/source/libfabric -ENV LIBFABRIC_DIR=$WORK_SPACE/install/libfabric -ENV MERCURY_SRC_DIR=$WORK_SPACE/source/mercury -ENV MERCURY_DIR=$WORK_SPACE/install/mercury - -ENV PDC_SRC_DIR=$WORK_SPACE/source/pdc -ENV PDC_DIR=$WORK_SPACE/install/pdc - -RUN mkdir -p $LIBFABRIC_SRC_DIR && \ - mkdir -p $MERCURY_SRC_DIR && \ - mkdir -p $PDC_SRC_DIR && \ - mkdir -p $LIBFABRIC_DIR && \ - mkdir -p $MERCURY_DIR && \ - mkdir -p $PDC_DIR - - -# Save the environment variables to a file -RUN echo "export LIBFABRIC_SRC_DIR=$WORK_SPACE/source/libfabric" > $WORK_SPACE/pdc_env.sh && \ - echo "export LIBFABRIC_DIR=$WORK_SPACE/install/libfabric" >> $WORK_SPACE/pdc_env.sh && \ - echo "export MERCURY_SRC_DIR=$WORK_SPACE/source/mercury" >> $WORK_SPACE/pdc_env.sh && \ - echo "export MERCURY_DIR=$WORK_SPACE/install/mercury" >> $WORK_SPACE/pdc_env.sh && \ - echo "export PDC_SRC_DIR=$WORK_SPACE/source/pdc" >> $WORK_SPACE/pdc_env.sh && \ - echo "export PDC_DIR=$WORK_SPACE/install/pdc" >> $WORK_SPACE/pdc_env.sh - - -# Build and install libfabric -WORKDIR $LIBFABRIC_SRC_DIR -RUN git checkout v1.18.0 && \ + python3 && \ + cd $WORK_SPACE/software && \ + bash -c "$(curl -fsSL https://raw.githubusercontent.com/ohmybash/oh-my-bash/master/tools/install.sh)" && \ + sed -i 's/OSH_THEME="font"/OSH_THEME="powerline-multiline"/g' ~/.bashrc && \ + echo "https://julialang-s3.julialang.org/bin/linux/aarch64/1.6/julia-1.6.7-linux-aarch64.tar.gz" > $WORK_SPACE/software/julia_url_arm64v8.txt && \ + echo "https://julialang-s3.julialang.org/bin/linux/x64/1.6/julia-1.6.7-linux-x86_64.tar.gz" > $WORK_SPACE/software/julia_url_amd64.txt && \ + echo $(cat $WORK_SPACE/software/julia_url_${ARCH_CODE}.txt) && sleep 3 && \ + mkdir -p /opt/julia && wget -O - $(cat $WORK_SPACE/software/julia_url_${ARCH_CODE}.txt) | tar -xz -C /opt/julia --strip-components=1 && \ + ln -s /opt/julia/bin/julia /usr/local/bin/julia && \ + rm -rf $WORK_SPACE/software/julia_url_*.txt && \ + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y && \ + echo 'source $HOME/.cargo/env' >> ~/.bashrc && \ + git clone https://github.com/DoozyX/clang-format-lint-action.git && \ + git clone https://github.com/ofiwg/libfabric.git ${LIBFABRIC_SRC_DIR} && \ + git clone https://github.com/mercury-hpc/mercury.git --recursive ${MERCURY_SRC_DIR} && \ + cd $LIBFABRIC_SRC_DIR && \ + git checkout v1.18.0 && \ ./autogen.sh && \ ./configure --prefix=$LIBFABRIC_DIR CC=mpicc CFLAG="-O2" && \ make clean && \ make -j 8 && make install && \ - make check - -ENV LD_LIBRARY_PATH="$LIBFABRIC_DIR/lib:$LD_LIBRARY_PATH" -ENV PATH="$LIBFABRIC_DIR/include:$LIBFABRIC_DIR/lib:$PATH" -RUN echo 'export LD_LIBRARY_PATH=$LIBFABRIC_DIR/lib:$LD_LIBRARY_PATH' >> $WORK_SPACE/pdc_env.sh && \ - echo 'export PATH=$LIBFABRIC_DIR/include:$LIBFABRIC_DIR/lib:$PATH' >> $WORK_SPACE/pdc_env.sh - - -# Build and install Mercury -WORKDIR $MERCURY_SRC_DIR -ENV MERCURY_CMAKE_FLAGS="-DCMAKE_INSTALL_PREFIX=$MERCURY_DIR -DCMAKE_C_COMPILER=mpicc -DBUILD_SHARED_LIBS=ON -DBUILD_TESTING=ON -DNA_USE_OFI=ON -DNA_USE_SM=OFF -DNA_OFI_TESTING_PROTOCOL=tcp " -RUN git checkout v2.2.0 \ - mkdir -p build -WORKDIR ${MERCURY_SRC_DIR}/build -RUN cmake $MERCURY_CMAKE_FLAGS ../ && \ + make check && \ + cd $MERCURY_SRC_DIR && \ + git checkout v2.2.0 && \ + mkdir -p build && \ + cd ${MERCURY_SRC_DIR}/build && \ + cmake $MERCURY_CMAKE_FLAGS ../ && \ make -j 16 && make install && \ - ctest - -# Set the environment variables -ENV LD_LIBRARY_PATH="$MERCURY_DIR/lib:$LD_LIBRARY_PATH" -ENV PATH="$MERCURY_DIR/include:$MERCURY_DIR/lib:$PATH" -RUN echo 'export LD_LIBRARY_PATH=$MERCURY_DIR/lib:$LD_LIBRARY_PATH' >> $WORK_SPACE/pdc_env.sh \ - echo 'export PATH=$MERCURY_DIR/include:$MERCURY_DIR/lib:$PATH' >> $WORK_SPACE/pdc_env.sh - + ctest && \ + apt-get autoremove -y && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* && \ + rm -rf /tmp/* && \ + rm -rf /var/tmp/* ENV PDC_CMAKE_FLAGS="-DBUILD_MPI_TESTING=ON -DBUILD_SHARED_LIBS=ON -DBUILD_TESTING=ON -DCMAKE_INSTALL_PREFIX=$PDC_DIR -DPDC_ENABLE_MPI=ON -DMERCURY_DIR=$MERCURY_DIR -DCMAKE_C_COMPILER=mpicc -DMPI_RUN_CMD=mpiexec " diff --git a/.gitignore b/.gitignore index 860c7a60a..89730e41e 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,9 @@ build # ignore docs build dir docs/build + +# ignore venv +**/*venv + +# ignore .zed +.zed \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index a3abd14bc..fd6e69074 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -207,6 +207,11 @@ if(SUPPRESS_IGNORABLE_WARNINGS) set(SUPPRESSED_LIST "-Wno-unused-function" ${SUPPRESSED_LIST}) set(SUPPRESSED_LIST "-Wno-unused-result" ${SUPPRESSED_LIST}) set(SUPPRESSED_LIST "-Wno-unused-but-set-variable" ${SUPPRESSED_LIST}) + set(SUPPRESSED_LIST "-Wno-inline" ${SUPPRESSED_LIST}) + set(SUPPRESSED_LIST "-Wno-alloc-size-larger-than" ${SUPPRESSED_LIST}) + set(SUPPRESSED_LIST "-Wno-array-bounds" ${SUPPRESSED_LIST}) + set(SUPPRESSED_LIST "-Wno-stringop-truncation" ${SUPPRESSED_LIST}) + set(SUPPRESSED_LIST "-Wno-use-after-free" ${SUPPRESSED_LIST}) if(CMAKE_C_COMPILER_ID MATCHES "GNU|Clang") add_compile_options(${SUPPRESSED_LIST}) diff --git a/benchmark/idioms/CMakeLists.txt b/benchmark/idioms/CMakeLists.txt new file mode 100644 index 000000000..a1da6c117 --- /dev/null +++ b/benchmark/idioms/CMakeLists.txt @@ -0,0 +1,68 @@ +cmake_minimum_required (VERSION 2.8.12) + +# Setup cmake policies. +foreach(p + CMP0012 + CMP0013 + CMP0014 + CMP0022 # CMake 2.8.12 + CMP0025 # CMake 3.0 + CMP0053 # CMake 3.1 + CMP0054 # CMake 3.1 + CMP0074 # CMake 3.12 + CMP0075 # CMake 3.12 + CMP0083 # CMake 3.14 + CMP0093 # CMake 3.15 + ) + if(POLICY ${p}) + cmake_policy(SET ${p} NEW) + endif() +endforeach() + +project(METADATA_JSON_LOADER C) + +set(JMD_LDR_EXT_INCLUDE_DIRS "") +set(JMD_LDR_EXT_LIBRARIES "") + +set(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "Choose the type of build." FORCE) +# set(JMD_DEBUG ON CACHE BOOL "Enable debug mode.") +option(JMD_DEBUG_MODE "Enable debug mode." ON) +if(JMD_DEBUG_MODE) + add_definitions(-DJMD_DEBUG=1) +endif() + +option(USE_SYSTEM_MPI "Use system-installed MPI." ON) +if(USE_SYSTEM_MPI) + find_package(MPI) + if(MPI_FOUND) + add_definitions(-DJMD_LDR_ENABLE_MPI=1) + SET(CMAKE_C_COMPILER ${MPI_C_COMPILER}) + SET(CMAKE_CXX_COMPILER ${MPI_CXX_COMPILER}) + set(JMD_LDR_EXT_INCLUDE_DIRS ${MPI_C_INCLUDE_PATH} + ${JMD_LDR_EXT_INCLUDE_DIRS} + ) + set(JMD_LDR_EXT_LIBRARIES ${MPI_C_LIBRARIES} ${JMD_LDR_EXT_LIBRARIES}) + endif() +endif() + +find_package(PDC REQUIRED) +if(PDC_FOUND) + #message(STATUS "PDC include directory: ${PDC_INCLUDE_DIR}") + set(JMD_LDR_EXT_INCLUDE_DIRS ${PDC_INCLUDE_DIR} + ${JMD_LDR_EXT_INCLUDE_DIRS} + ) + set(JMD_LDR_EXT_LIBRARIES pdc ${JMD_LDR_EXT_LIBRARIES}) +endif() + + +add_library(cjson cjson/cJSON.c) + +add_executable(metadata_json_loader + metadata_json_loader.c + metadata_json_processor.h + metadata_json_printer.c + metadata_json_printer.h + metadata_json_importer.c + metadata_json_importer.h) +target_link_libraries(metadata_json_loader ${PDC_EXT_LIB_DEPENDENCIES} pdc cjson ${JMD_LDR_EXT_LIBRARIES}) +target_include_directories(metadata_json_loader PUBLIC ${PDC_EXT_INCLUDE_DEPENDENCIES} ${JMD_LDR_EXT_INCLUDE_DIRS}) \ No newline at end of file diff --git a/benchmark/idioms/cjson/cJSON.c b/benchmark/idioms/cjson/cJSON.c new file mode 100644 index 000000000..e6afa831f --- /dev/null +++ b/benchmark/idioms/cjson/cJSON.c @@ -0,0 +1,2859 @@ +/* + Copyright (c) 2009-2017 Dave Gamble and cJSON contributors + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +*/ + +/* cJSON */ +/* JSON parser in C. */ + +/* disable warnings about old C89 functions in MSVC */ +#if !defined(_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER) +#define _CRT_SECURE_NO_DEPRECATE +#endif + +#ifdef __GNUC__ +#pragma GCC visibility push(default) +#endif +#if defined(_MSC_VER) +#pragma warning(push) +/* disable warning about single line comments in system headers */ +#pragma warning(disable : 4001) +#endif + +#include +#include +#include +#include +#include +#include +#include + +#ifdef ENABLE_LOCALES +#include +#endif + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif +#ifdef __GNUC__ +#pragma GCC visibility pop +#endif + +#include "cJSON.h" + +/* define our own boolean type */ +#ifdef true +#undef true +#endif +#define true ((cJSON_bool)1) + +#ifdef false +#undef false +#endif +#define false ((cJSON_bool)0) + +/* define isnan and isinf for ANSI C, if in C99 or above, isnan and isinf has been defined in math.h */ +#ifndef isinf +#define isinf(d) (isnan((d - d)) && !isnan(d)) +#endif +#ifndef isnan +#define isnan(d) (d != d) +#endif + +#ifndef NAN +#ifdef _WIN32 +#define NAN sqrt(-1.0) +#else +#define NAN 0.0 / 0.0 +#endif +#endif + +typedef struct { + const unsigned char *json; + size_t position; +} error; +static error global_error = {NULL, 0}; + +CJSON_PUBLIC(const char *) cJSON_GetErrorPtr(void) +{ + return (const char *)(global_error.json + global_error.position); +} + +CJSON_PUBLIC(char *) cJSON_GetStringValue(const cJSON *const item) +{ + if (!cJSON_IsString(item)) { + return NULL; + } + + return item->valuestring; +} + +CJSON_PUBLIC(double) cJSON_GetNumberValue(const cJSON *const item) +{ + if (!cJSON_IsNumber(item)) { + return (double)NAN; + } + + return item->valuedouble; +} + +/* This is a safeguard to prevent copy-pasters from using incompatible C and header files */ +#if (CJSON_VERSION_MAJOR != 1) || (CJSON_VERSION_MINOR != 7) || (CJSON_VERSION_PATCH != 17) +#error cJSON.h and cJSON.c have different versions. Make sure that both have the same. +#endif + +CJSON_PUBLIC(const char *) cJSON_Version(void) +{ + static char version[15]; + sprintf(version, "%i.%i.%i", CJSON_VERSION_MAJOR, CJSON_VERSION_MINOR, CJSON_VERSION_PATCH); + + return version; +} + +/* Case insensitive string comparison, doesn't consider two NULL pointers equal though */ +static int +case_insensitive_strcmp(const unsigned char *string1, const unsigned char *string2) +{ + if ((string1 == NULL) || (string2 == NULL)) { + return 1; + } + + if (string1 == string2) { + return 0; + } + + for (; tolower(*string1) == tolower(*string2); (void)string1++, string2++) { + if (*string1 == '\0') { + return 0; + } + } + + return tolower(*string1) - tolower(*string2); +} + +typedef struct internal_hooks { + void *(CJSON_CDECL *allocate)(size_t size); + void(CJSON_CDECL *deallocate)(void *pointer); + void *(CJSON_CDECL *reallocate)(void *pointer, size_t size); +} internal_hooks; + +#if defined(_MSC_VER) +/* work around MSVC error C2322: '...' address of dllimport '...' is not static */ +static void *CJSON_CDECL +internal_malloc(size_t size) +{ + return malloc(size); +} +static void CJSON_CDECL +internal_free(void *pointer) +{ + free(pointer); +} +static void *CJSON_CDECL +internal_realloc(void *pointer, size_t size) +{ + return realloc(pointer, size); +} +#else +#define internal_malloc malloc +#define internal_free free +#define internal_realloc realloc +#endif + +/* strlen of character literals resolved at compile time */ +#define static_strlen(string_literal) (sizeof(string_literal) - sizeof("")) + +static internal_hooks global_hooks = {internal_malloc, internal_free, internal_realloc}; + +static unsigned char * +cJSON_strdup(const unsigned char *string, const internal_hooks *const hooks) +{ + size_t length = 0; + unsigned char *copy = NULL; + + if (string == NULL) { + return NULL; + } + + length = strlen((const char *)string) + sizeof(""); + copy = (unsigned char *)hooks->allocate(length); + if (copy == NULL) { + return NULL; + } + memcpy(copy, string, length); + + return copy; +} + +CJSON_PUBLIC(void) cJSON_InitHooks(cJSON_Hooks *hooks) +{ + if (hooks == NULL) { + /* Reset hooks */ + global_hooks.allocate = malloc; + global_hooks.deallocate = free; + global_hooks.reallocate = realloc; + return; + } + + global_hooks.allocate = malloc; + if (hooks->malloc_fn != NULL) { + global_hooks.allocate = hooks->malloc_fn; + } + + global_hooks.deallocate = free; + if (hooks->free_fn != NULL) { + global_hooks.deallocate = hooks->free_fn; + } + + /* use realloc only if both free and malloc are used */ + global_hooks.reallocate = NULL; + if ((global_hooks.allocate == malloc) && (global_hooks.deallocate == free)) { + global_hooks.reallocate = realloc; + } +} + +/* Internal constructor. */ +static cJSON * +cJSON_New_Item(const internal_hooks *const hooks) +{ + cJSON *node = (cJSON *)hooks->allocate(sizeof(cJSON)); + if (node) { + memset(node, '\0', sizeof(cJSON)); + } + + return node; +} + +/* Delete a cJSON structure. */ +CJSON_PUBLIC(void) cJSON_Delete(cJSON *item) +{ + cJSON *next = NULL; + while (item != NULL) { + next = item->next; + if (!(item->type & cJSON_IsReference) && (item->child != NULL)) { + cJSON_Delete(item->child); + } + if (!(item->type & cJSON_IsReference) && (item->valuestring != NULL)) { + global_hooks.deallocate(item->valuestring); + } + if (!(item->type & cJSON_StringIsConst) && (item->string != NULL)) { + global_hooks.deallocate(item->string); + } + global_hooks.deallocate(item); + item = next; + } +} + +/* get the decimal point character of the current locale */ +static unsigned char +get_decimal_point(void) +{ +#ifdef ENABLE_LOCALES + struct lconv *lconv = localeconv(); + return (unsigned char)lconv->decimal_point[0]; +#else + return '.'; +#endif +} + +typedef struct { + const unsigned char *content; + size_t length; + size_t offset; + size_t depth; /* How deeply nested (in arrays/objects) is the input at the current offset. */ + internal_hooks hooks; +} parse_buffer; + +/* check if the given size is left to read in a given parse buffer (starting with 1) */ +#define can_read(buffer, size) ((buffer != NULL) && (((buffer)->offset + size) <= (buffer)->length)) +/* check if the buffer can be accessed at the given index (starting with 0) */ +#define can_access_at_index(buffer, index) \ + ((buffer != NULL) && (((buffer)->offset + index) < (buffer)->length)) +#define cannot_access_at_index(buffer, index) (!can_access_at_index(buffer, index)) +/* get a pointer to the buffer at the position */ +#define buffer_at_offset(buffer) ((buffer)->content + (buffer)->offset) + +/* Parse the input text to generate a number, and populate the result into item. */ +static cJSON_bool +parse_number(cJSON *const item, parse_buffer *const input_buffer) +{ + double number = 0; + unsigned char *after_end = NULL; + unsigned char number_c_string[64]; + unsigned char decimal_point = get_decimal_point(); + size_t i = 0; + + if ((input_buffer == NULL) || (input_buffer->content == NULL)) { + return false; + } + + /* copy the number into a temporary buffer and replace '.' with the decimal point + * of the current locale (for strtod) + * This also takes care of '\0' not necessarily being available for marking the end of the input */ + for (i = 0; (i < (sizeof(number_c_string) - 1)) && can_access_at_index(input_buffer, i); i++) { + switch (buffer_at_offset(input_buffer)[i]) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '+': + case '-': + case 'e': + case 'E': + number_c_string[i] = buffer_at_offset(input_buffer)[i]; + break; + + case '.': + number_c_string[i] = decimal_point; + break; + + default: + goto loop_end; + } + } +loop_end: + number_c_string[i] = '\0'; + + number = strtod((const char *)number_c_string, (char **)&after_end); + if (number_c_string == after_end) { + return false; /* parse_error */ + } + + item->valuedouble = number; + + /* use saturation in case of overflow */ + if (number >= INT_MAX) { + item->valueint = INT_MAX; + } + else if (number <= (double)INT_MIN) { + item->valueint = INT_MIN; + } + else { + item->valueint = (int)number; + } + + item->type = cJSON_Number; + + input_buffer->offset += (size_t)(after_end - number_c_string); + return true; +} + +/* don't ask me, but the original cJSON_SetNumberValue returns an integer or double */ +CJSON_PUBLIC(double) cJSON_SetNumberHelper(cJSON *object, double number) +{ + if (number >= INT_MAX) { + object->valueint = INT_MAX; + } + else if (number <= (double)INT_MIN) { + object->valueint = INT_MIN; + } + else { + object->valueint = (int)number; + } + + return object->valuedouble = number; +} + +CJSON_PUBLIC(char *) cJSON_SetValuestring(cJSON *object, const char *valuestring) +{ + char *copy = NULL; + /* if object's type is not cJSON_String or is cJSON_IsReference, it should not set valuestring */ + if ((object == NULL) || !(object->type & cJSON_String) || (object->type & cJSON_IsReference)) { + return NULL; + } + /* return NULL if the object is corrupted */ + if (object->valuestring == NULL) { + return NULL; + } + if (strlen(valuestring) <= strlen(object->valuestring)) { + strcpy(object->valuestring, valuestring); + return object->valuestring; + } + copy = (char *)cJSON_strdup((const unsigned char *)valuestring, &global_hooks); + if (copy == NULL) { + return NULL; + } + if (object->valuestring != NULL) { + cJSON_free(object->valuestring); + } + object->valuestring = copy; + + return copy; +} + +typedef struct { + unsigned char *buffer; + size_t length; + size_t offset; + size_t depth; /* current nesting depth (for formatted printing) */ + cJSON_bool noalloc; + cJSON_bool format; /* is this print a formatted print */ + internal_hooks hooks; +} printbuffer; + +/* realloc printbuffer if necessary to have at least "needed" bytes more */ +static unsigned char * +ensure(printbuffer *const p, size_t needed) +{ + unsigned char *newbuffer = NULL; + size_t newsize = 0; + + if ((p == NULL) || (p->buffer == NULL)) { + return NULL; + } + + if ((p->length > 0) && (p->offset >= p->length)) { + /* make sure that offset is valid */ + return NULL; + } + + if (needed > INT_MAX) { + /* sizes bigger than INT_MAX are currently not supported */ + return NULL; + } + + needed += p->offset + 1; + if (needed <= p->length) { + return p->buffer + p->offset; + } + + if (p->noalloc) { + return NULL; + } + + /* calculate new buffer size */ + if (needed > (INT_MAX / 2)) { + /* overflow of int, use INT_MAX if possible */ + if (needed <= INT_MAX) { + newsize = INT_MAX; + } + else { + return NULL; + } + } + else { + newsize = needed * 2; + } + + if (p->hooks.reallocate != NULL) { + /* reallocate with realloc if available */ + newbuffer = (unsigned char *)p->hooks.reallocate(p->buffer, newsize); + if (newbuffer == NULL) { + p->hooks.deallocate(p->buffer); + p->length = 0; + p->buffer = NULL; + + return NULL; + } + } + else { + /* otherwise reallocate manually */ + newbuffer = (unsigned char *)p->hooks.allocate(newsize); + if (!newbuffer) { + p->hooks.deallocate(p->buffer); + p->length = 0; + p->buffer = NULL; + + return NULL; + } + + memcpy(newbuffer, p->buffer, p->offset + 1); + p->hooks.deallocate(p->buffer); + } + p->length = newsize; + p->buffer = newbuffer; + + return newbuffer + p->offset; +} + +/* calculate the new length of the string in a printbuffer and update the offset */ +static void +update_offset(printbuffer *const buffer) +{ + const unsigned char *buffer_pointer = NULL; + if ((buffer == NULL) || (buffer->buffer == NULL)) { + return; + } + buffer_pointer = buffer->buffer + buffer->offset; + + buffer->offset += strlen((const char *)buffer_pointer); +} + +/* securely comparison of floating-point variables */ +static cJSON_bool +compare_double(double a, double b) +{ + double maxVal = fabs(a) > fabs(b) ? fabs(a) : fabs(b); + return (fabs(a - b) <= maxVal * DBL_EPSILON); +} + +/* Render the number nicely from the given item into a string. */ +static cJSON_bool +print_number(const cJSON *const item, printbuffer *const output_buffer) +{ + unsigned char *output_pointer = NULL; + double d = item->valuedouble; + int length = 0; + size_t i = 0; + unsigned char number_buffer[26] = {0}; /* temporary buffer to print the number into */ + unsigned char decimal_point = get_decimal_point(); + double test = 0.0; + + if (output_buffer == NULL) { + return false; + } + + /* This checks for NaN and Infinity */ + if (isnan(d) || isinf(d)) { + length = sprintf((char *)number_buffer, "null"); + } + else if (d == (double)item->valueint) { + length = sprintf((char *)number_buffer, "%d", item->valueint); + } + else { + /* Try 15 decimal places of precision to avoid nonsignificant nonzero digits */ + length = sprintf((char *)number_buffer, "%1.15g", d); + + /* Check whether the original double can be recovered */ + if ((sscanf((char *)number_buffer, "%lg", &test) != 1) || !compare_double((double)test, d)) { + /* If not, print with 17 decimal places of precision */ + length = sprintf((char *)number_buffer, "%1.17g", d); + } + } + + /* sprintf failed or buffer overrun occurred */ + if ((length < 0) || (length > (int)(sizeof(number_buffer) - 1))) { + return false; + } + + /* reserve appropriate space in the output */ + output_pointer = ensure(output_buffer, (size_t)length + sizeof("")); + if (output_pointer == NULL) { + return false; + } + + /* copy the printed number to the output and replace locale + * dependent decimal point with '.' */ + for (i = 0; i < ((size_t)length); i++) { + if (number_buffer[i] == decimal_point) { + output_pointer[i] = '.'; + continue; + } + + output_pointer[i] = number_buffer[i]; + } + output_pointer[i] = '\0'; + + output_buffer->offset += (size_t)length; + + return true; +} + +/* parse 4 digit hexadecimal number */ +static unsigned +parse_hex4(const unsigned char *const input) +{ + unsigned int h = 0; + size_t i = 0; + + for (i = 0; i < 4; i++) { + /* parse digit */ + if ((input[i] >= '0') && (input[i] <= '9')) { + h += (unsigned int)input[i] - '0'; + } + else if ((input[i] >= 'A') && (input[i] <= 'F')) { + h += (unsigned int)10 + input[i] - 'A'; + } + else if ((input[i] >= 'a') && (input[i] <= 'f')) { + h += (unsigned int)10 + input[i] - 'a'; + } + else /* invalid */ + { + return 0; + } + + if (i < 3) { + /* shift left to make place for the next nibble */ + h = h << 4; + } + } + + return h; +} + +/* converts a UTF-16 literal to UTF-8 + * A literal can be one or two sequences of the form \uXXXX */ +static unsigned char +utf16_literal_to_utf8(const unsigned char *const input_pointer, const unsigned char *const input_end, + unsigned char **output_pointer) +{ + long unsigned int codepoint = 0; + unsigned int first_code = 0; + const unsigned char *first_sequence = input_pointer; + unsigned char utf8_length = 0; + unsigned char utf8_position = 0; + unsigned char sequence_length = 0; + unsigned char first_byte_mark = 0; + + if ((input_end - first_sequence) < 6) { + /* input ends unexpectedly */ + goto fail; + } + + /* get the first utf16 sequence */ + first_code = parse_hex4(first_sequence + 2); + + /* check that the code is valid */ + if (((first_code >= 0xDC00) && (first_code <= 0xDFFF))) { + goto fail; + } + + /* UTF16 surrogate pair */ + if ((first_code >= 0xD800) && (first_code <= 0xDBFF)) { + const unsigned char *second_sequence = first_sequence + 6; + unsigned int second_code = 0; + sequence_length = 12; /* \uXXXX\uXXXX */ + + if ((input_end - second_sequence) < 6) { + /* input ends unexpectedly */ + goto fail; + } + + if ((second_sequence[0] != '\\') || (second_sequence[1] != 'u')) { + /* missing second half of the surrogate pair */ + goto fail; + } + + /* get the second utf16 sequence */ + second_code = parse_hex4(second_sequence + 2); + /* check that the code is valid */ + if ((second_code < 0xDC00) || (second_code > 0xDFFF)) { + /* invalid second half of the surrogate pair */ + goto fail; + } + + /* calculate the unicode codepoint from the surrogate pair */ + codepoint = 0x10000 + (((first_code & 0x3FF) << 10) | (second_code & 0x3FF)); + } + else { + sequence_length = 6; /* \uXXXX */ + codepoint = first_code; + } + + /* encode as UTF-8 + * takes at maximum 4 bytes to encode: + * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ + if (codepoint < 0x80) { + /* normal ascii, encoding 0xxxxxxx */ + utf8_length = 1; + } + else if (codepoint < 0x800) { + /* two bytes, encoding 110xxxxx 10xxxxxx */ + utf8_length = 2; + first_byte_mark = 0xC0; /* 11000000 */ + } + else if (codepoint < 0x10000) { + /* three bytes, encoding 1110xxxx 10xxxxxx 10xxxxxx */ + utf8_length = 3; + first_byte_mark = 0xE0; /* 11100000 */ + } + else if (codepoint <= 0x10FFFF) { + /* four bytes, encoding 1110xxxx 10xxxxxx 10xxxxxx 10xxxxxx */ + utf8_length = 4; + first_byte_mark = 0xF0; /* 11110000 */ + } + else { + /* invalid unicode codepoint */ + goto fail; + } + + /* encode as utf8 */ + for (utf8_position = (unsigned char)(utf8_length - 1); utf8_position > 0; utf8_position--) { + /* 10xxxxxx */ + (*output_pointer)[utf8_position] = (unsigned char)((codepoint | 0x80) & 0xBF); + codepoint >>= 6; + } + /* encode first byte */ + if (utf8_length > 1) { + (*output_pointer)[0] = (unsigned char)((codepoint | first_byte_mark) & 0xFF); + } + else { + (*output_pointer)[0] = (unsigned char)(codepoint & 0x7F); + } + + *output_pointer += utf8_length; + + return sequence_length; + +fail: + return 0; +} + +/* Parse the input text into an unescaped cinput, and populate item. */ +static cJSON_bool +parse_string(cJSON *const item, parse_buffer *const input_buffer) +{ + const unsigned char *input_pointer = buffer_at_offset(input_buffer) + 1; + const unsigned char *input_end = buffer_at_offset(input_buffer) + 1; + unsigned char * output_pointer = NULL; + unsigned char * output = NULL; + + /* not a string */ + if (buffer_at_offset(input_buffer)[0] != '\"') { + goto fail; + } + + { + /* calculate approximate size of the output (overestimate) */ + size_t allocation_length = 0; + size_t skipped_bytes = 0; + while (((size_t)(input_end - input_buffer->content) < input_buffer->length) && (*input_end != '\"')) { + /* is escape sequence */ + if (input_end[0] == '\\') { + if ((size_t)(input_end + 1 - input_buffer->content) >= input_buffer->length) { + /* prevent buffer overflow when last input character is a backslash */ + goto fail; + } + skipped_bytes++; + input_end++; + } + input_end++; + } + if (((size_t)(input_end - input_buffer->content) >= input_buffer->length) || (*input_end != '\"')) { + goto fail; /* string ended unexpectedly */ + } + + /* This is at most how much we need for the output */ + allocation_length = (size_t)(input_end - buffer_at_offset(input_buffer)) - skipped_bytes; + output = (unsigned char *)input_buffer->hooks.allocate(allocation_length + sizeof("")); + if (output == NULL) { + goto fail; /* allocation failure */ + } + } + + output_pointer = output; + /* loop through the string literal */ + while (input_pointer < input_end) { + if (*input_pointer != '\\') { + *output_pointer++ = *input_pointer++; + } + /* escape sequence */ + else { + unsigned char sequence_length = 2; + if ((input_end - input_pointer) < 1) { + goto fail; + } + + switch (input_pointer[1]) { + case 'b': + *output_pointer++ = '\b'; + break; + case 'f': + *output_pointer++ = '\f'; + break; + case 'n': + *output_pointer++ = '\n'; + break; + case 'r': + *output_pointer++ = '\r'; + break; + case 't': + *output_pointer++ = '\t'; + break; + case '\"': + case '\\': + case '/': + *output_pointer++ = input_pointer[1]; + break; + + /* UTF-16 literal */ + case 'u': + sequence_length = utf16_literal_to_utf8(input_pointer, input_end, &output_pointer); + if (sequence_length == 0) { + /* failed to convert UTF16-literal to UTF-8 */ + goto fail; + } + break; + + default: + goto fail; + } + input_pointer += sequence_length; + } + } + + /* zero terminate the output */ + *output_pointer = '\0'; + + item->type = cJSON_String; + item->valuestring = (char *)output; + + input_buffer->offset = (size_t)(input_end - input_buffer->content); + input_buffer->offset++; + + return true; + +fail: + if (output != NULL) { + input_buffer->hooks.deallocate(output); + } + + if (input_pointer != NULL) { + input_buffer->offset = (size_t)(input_pointer - input_buffer->content); + } + + return false; +} + +/* Render the cstring provided to an escaped version that can be printed. */ +static cJSON_bool +print_string_ptr(const unsigned char *const input, printbuffer *const output_buffer) +{ + const unsigned char *input_pointer = NULL; + unsigned char * output = NULL; + unsigned char * output_pointer = NULL; + size_t output_length = 0; + /* numbers of additional characters needed for escaping */ + size_t escape_characters = 0; + + if (output_buffer == NULL) { + return false; + } + + /* empty string */ + if (input == NULL) { + output = ensure(output_buffer, sizeof("\"\"")); + if (output == NULL) { + return false; + } + strcpy((char *)output, "\"\""); + + return true; + } + + /* set "flag" to 1 if something needs to be escaped */ + for (input_pointer = input; *input_pointer; input_pointer++) { + switch (*input_pointer) { + case '\"': + case '\\': + case '\b': + case '\f': + case '\n': + case '\r': + case '\t': + /* one character escape sequence */ + escape_characters++; + break; + default: + if (*input_pointer < 32) { + /* UTF-16 escape sequence uXXXX */ + escape_characters += 5; + } + break; + } + } + output_length = (size_t)(input_pointer - input) + escape_characters; + + output = ensure(output_buffer, output_length + sizeof("\"\"")); + if (output == NULL) { + return false; + } + + /* no characters have to be escaped */ + if (escape_characters == 0) { + output[0] = '\"'; + memcpy(output + 1, input, output_length); + output[output_length + 1] = '\"'; + output[output_length + 2] = '\0'; + + return true; + } + + output[0] = '\"'; + output_pointer = output + 1; + /* copy the string */ + for (input_pointer = input; *input_pointer != '\0'; (void)input_pointer++, output_pointer++) { + if ((*input_pointer > 31) && (*input_pointer != '\"') && (*input_pointer != '\\')) { + /* normal character, copy */ + *output_pointer = *input_pointer; + } + else { + /* character needs to be escaped */ + *output_pointer++ = '\\'; + switch (*input_pointer) { + case '\\': + *output_pointer = '\\'; + break; + case '\"': + *output_pointer = '\"'; + break; + case '\b': + *output_pointer = 'b'; + break; + case '\f': + *output_pointer = 'f'; + break; + case '\n': + *output_pointer = 'n'; + break; + case '\r': + *output_pointer = 'r'; + break; + case '\t': + *output_pointer = 't'; + break; + default: + /* escape and print as unicode codepoint */ + sprintf((char *)output_pointer, "u%04x", *input_pointer); + output_pointer += 4; + break; + } + } + } + output[output_length + 1] = '\"'; + output[output_length + 2] = '\0'; + + return true; +} + +/* Invoke print_string_ptr (which is useful) on an item. */ +static cJSON_bool +print_string(const cJSON *const item, printbuffer *const p) +{ + return print_string_ptr((unsigned char *)item->valuestring, p); +} + +/* Predeclare these prototypes. */ +static cJSON_bool parse_value(cJSON *const item, parse_buffer *const input_buffer); +static cJSON_bool print_value(const cJSON *const item, printbuffer *const output_buffer); +static cJSON_bool parse_array(cJSON *const item, parse_buffer *const input_buffer); +static cJSON_bool print_array(const cJSON *const item, printbuffer *const output_buffer); +static cJSON_bool parse_object(cJSON *const item, parse_buffer *const input_buffer); +static cJSON_bool print_object(const cJSON *const item, printbuffer *const output_buffer); + +/* Utility to jump whitespace and cr/lf */ +static parse_buffer * +buffer_skip_whitespace(parse_buffer *const buffer) +{ + if ((buffer == NULL) || (buffer->content == NULL)) { + return NULL; + } + + if (cannot_access_at_index(buffer, 0)) { + return buffer; + } + + while (can_access_at_index(buffer, 0) && (buffer_at_offset(buffer)[0] <= 32)) { + buffer->offset++; + } + + if (buffer->offset == buffer->length) { + buffer->offset--; + } + + return buffer; +} + +/* skip the UTF-8 BOM (byte order mark) if it is at the beginning of a buffer */ +static parse_buffer * +skip_utf8_bom(parse_buffer *const buffer) +{ + if ((buffer == NULL) || (buffer->content == NULL) || (buffer->offset != 0)) { + return NULL; + } + + if (can_access_at_index(buffer, 4) && + (strncmp((const char *)buffer_at_offset(buffer), "\xEF\xBB\xBF", 3) == 0)) { + buffer->offset += 3; + } + + return buffer; +} + +CJSON_PUBLIC(cJSON *) +cJSON_ParseWithOpts(const char *value, const char **return_parse_end, cJSON_bool require_null_terminated) +{ + size_t buffer_length; + + if (NULL == value) { + return NULL; + } + + /* Adding null character size due to require_null_terminated. */ + buffer_length = strlen(value) + sizeof(""); + + return cJSON_ParseWithLengthOpts(value, buffer_length, return_parse_end, require_null_terminated); +} + +/* Parse an object - create a new root, and populate. */ +CJSON_PUBLIC(cJSON *) +cJSON_ParseWithLengthOpts(const char *value, size_t buffer_length, const char **return_parse_end, + cJSON_bool require_null_terminated) +{ + parse_buffer buffer = {0, 0, 0, 0, {0, 0, 0}}; + cJSON * item = NULL; + + /* reset error position */ + global_error.json = NULL; + global_error.position = 0; + + if (value == NULL || 0 == buffer_length) { + goto fail; + } + + buffer.content = (const unsigned char *)value; + buffer.length = buffer_length; + buffer.offset = 0; + buffer.hooks = global_hooks; + + item = cJSON_New_Item(&global_hooks); + if (item == NULL) /* memory fail */ + { + goto fail; + } + + if (!parse_value(item, buffer_skip_whitespace(skip_utf8_bom(&buffer)))) { + /* parse failure. ep is set. */ + goto fail; + } + + /* if we require null-terminated JSON without appended garbage, skip and then check for a null terminator + */ + if (require_null_terminated) { + buffer_skip_whitespace(&buffer); + if ((buffer.offset >= buffer.length) || buffer_at_offset(&buffer)[0] != '\0') { + goto fail; + } + } + if (return_parse_end) { + *return_parse_end = (const char *)buffer_at_offset(&buffer); + } + + return item; + +fail: + if (item != NULL) { + cJSON_Delete(item); + } + + if (value != NULL) { + error local_error; + local_error.json = (const unsigned char *)value; + local_error.position = 0; + + if (buffer.offset < buffer.length) { + local_error.position = buffer.offset; + } + else if (buffer.length > 0) { + local_error.position = buffer.length - 1; + } + + if (return_parse_end != NULL) { + *return_parse_end = (const char *)local_error.json + local_error.position; + } + + global_error = local_error; + } + + return NULL; +} + +/* Default options for cJSON_Parse */ +CJSON_PUBLIC(cJSON *) cJSON_Parse(const char *value) +{ + return cJSON_ParseWithOpts(value, 0, 0); +} + +CJSON_PUBLIC(cJSON *) cJSON_ParseWithLength(const char *value, size_t buffer_length) +{ + return cJSON_ParseWithLengthOpts(value, buffer_length, 0, 0); +} + +#define cjson_min(a, b) (((a) < (b)) ? (a) : (b)) + +static unsigned char * +print(const cJSON *const item, cJSON_bool format, const internal_hooks *const hooks) +{ + static const size_t default_buffer_size = 256; + printbuffer buffer[1]; + unsigned char * printed = NULL; + + memset(buffer, 0, sizeof(buffer)); + + /* create buffer */ + buffer->buffer = (unsigned char *)hooks->allocate(default_buffer_size); + buffer->length = default_buffer_size; + buffer->format = format; + buffer->hooks = *hooks; + if (buffer->buffer == NULL) { + goto fail; + } + + /* print the value */ + if (!print_value(item, buffer)) { + goto fail; + } + update_offset(buffer); + + /* check if reallocate is available */ + if (hooks->reallocate != NULL) { + printed = (unsigned char *)hooks->reallocate(buffer->buffer, buffer->offset + 1); + if (printed == NULL) { + goto fail; + } + buffer->buffer = NULL; + } + else /* otherwise copy the JSON over to a new buffer */ + { + printed = (unsigned char *)hooks->allocate(buffer->offset + 1); + if (printed == NULL) { + goto fail; + } + memcpy(printed, buffer->buffer, cjson_min(buffer->length, buffer->offset + 1)); + printed[buffer->offset] = '\0'; /* just to be sure */ + + /* free the buffer */ + hooks->deallocate(buffer->buffer); + } + + return printed; + +fail: + if (buffer->buffer != NULL) { + hooks->deallocate(buffer->buffer); + } + + if (printed != NULL) { + hooks->deallocate(printed); + } + + return NULL; +} + +/* Render a cJSON item/entity/structure to text. */ +CJSON_PUBLIC(char *) cJSON_Print(const cJSON *item) +{ + return (char *)print(item, true, &global_hooks); +} + +CJSON_PUBLIC(char *) cJSON_PrintUnformatted(const cJSON *item) +{ + return (char *)print(item, false, &global_hooks); +} + +CJSON_PUBLIC(char *) cJSON_PrintBuffered(const cJSON *item, int prebuffer, cJSON_bool fmt) +{ + printbuffer p = {0, 0, 0, 0, 0, 0, {0, 0, 0}}; + + if (prebuffer < 0) { + return NULL; + } + + p.buffer = (unsigned char *)global_hooks.allocate((size_t)prebuffer); + if (!p.buffer) { + return NULL; + } + + p.length = (size_t)prebuffer; + p.offset = 0; + p.noalloc = false; + p.format = fmt; + p.hooks = global_hooks; + + if (!print_value(item, &p)) { + global_hooks.deallocate(p.buffer); + return NULL; + } + + return (char *)p.buffer; +} + +CJSON_PUBLIC(cJSON_bool) +cJSON_PrintPreallocated(cJSON *item, char *buffer, const int length, const cJSON_bool format) +{ + printbuffer p = {0, 0, 0, 0, 0, 0, {0, 0, 0}}; + + if ((length < 0) || (buffer == NULL)) { + return false; + } + + p.buffer = (unsigned char *)buffer; + p.length = (size_t)length; + p.offset = 0; + p.noalloc = true; + p.format = format; + p.hooks = global_hooks; + + return print_value(item, &p); +} + +/* Parser core - when encountering text, process appropriately. */ +static cJSON_bool +parse_value(cJSON *const item, parse_buffer *const input_buffer) +{ + if ((input_buffer == NULL) || (input_buffer->content == NULL)) { + return false; /* no input */ + } + + /* parse the different types of values */ + /* null */ + if (can_read(input_buffer, 4) && + (strncmp((const char *)buffer_at_offset(input_buffer), "null", 4) == 0)) { + item->type = cJSON_NULL; + input_buffer->offset += 4; + return true; + } + /* false */ + if (can_read(input_buffer, 5) && + (strncmp((const char *)buffer_at_offset(input_buffer), "false", 5) == 0)) { + item->type = cJSON_False; + input_buffer->offset += 5; + return true; + } + /* true */ + if (can_read(input_buffer, 4) && + (strncmp((const char *)buffer_at_offset(input_buffer), "true", 4) == 0)) { + item->type = cJSON_True; + item->valueint = 1; + input_buffer->offset += 4; + return true; + } + /* string */ + if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == '\"')) { + return parse_string(item, input_buffer); + } + /* number */ + if (can_access_at_index(input_buffer, 0) && + ((buffer_at_offset(input_buffer)[0] == '-') || + ((buffer_at_offset(input_buffer)[0] >= '0') && (buffer_at_offset(input_buffer)[0] <= '9')))) { + return parse_number(item, input_buffer); + } + /* array */ + if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == '[')) { + return parse_array(item, input_buffer); + } + /* object */ + if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == '{')) { + return parse_object(item, input_buffer); + } + + return false; +} + +/* Render a value to text. */ +static cJSON_bool +print_value(const cJSON *const item, printbuffer *const output_buffer) +{ + unsigned char *output = NULL; + + if ((item == NULL) || (output_buffer == NULL)) { + return false; + } + + switch ((item->type) & 0xFF) { + case cJSON_NULL: + output = ensure(output_buffer, 5); + if (output == NULL) { + return false; + } + strcpy((char *)output, "null"); + return true; + + case cJSON_False: + output = ensure(output_buffer, 6); + if (output == NULL) { + return false; + } + strcpy((char *)output, "false"); + return true; + + case cJSON_True: + output = ensure(output_buffer, 5); + if (output == NULL) { + return false; + } + strcpy((char *)output, "true"); + return true; + + case cJSON_Number: + return print_number(item, output_buffer); + + case cJSON_Raw: { + size_t raw_length = 0; + if (item->valuestring == NULL) { + return false; + } + + raw_length = strlen(item->valuestring) + sizeof(""); + output = ensure(output_buffer, raw_length); + if (output == NULL) { + return false; + } + memcpy(output, item->valuestring, raw_length); + return true; + } + + case cJSON_String: + return print_string(item, output_buffer); + + case cJSON_Array: + return print_array(item, output_buffer); + + case cJSON_Object: + return print_object(item, output_buffer); + + default: + return false; + } +} + +/* Build an array from input text. */ +static cJSON_bool +parse_array(cJSON *const item, parse_buffer *const input_buffer) +{ + cJSON *head = NULL; /* head of the linked list */ + cJSON *current_item = NULL; + + if (input_buffer->depth >= CJSON_NESTING_LIMIT) { + return false; /* to deeply nested */ + } + input_buffer->depth++; + + if (buffer_at_offset(input_buffer)[0] != '[') { + /* not an array */ + goto fail; + } + + input_buffer->offset++; + buffer_skip_whitespace(input_buffer); + if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == ']')) { + /* empty array */ + goto success; + } + + /* check if we skipped to the end of the buffer */ + if (cannot_access_at_index(input_buffer, 0)) { + input_buffer->offset--; + goto fail; + } + + /* step back to character in front of the first element */ + input_buffer->offset--; + /* loop through the comma separated array elements */ + do { + /* allocate next item */ + cJSON *new_item = cJSON_New_Item(&(input_buffer->hooks)); + if (new_item == NULL) { + goto fail; /* allocation failure */ + } + + /* attach next item to list */ + if (head == NULL) { + /* start the linked list */ + current_item = head = new_item; + } + else { + /* add to the end and advance */ + current_item->next = new_item; + new_item->prev = current_item; + current_item = new_item; + } + + /* parse next value */ + input_buffer->offset++; + buffer_skip_whitespace(input_buffer); + if (!parse_value(current_item, input_buffer)) { + goto fail; /* failed to parse value */ + } + buffer_skip_whitespace(input_buffer); + } while (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == ',')); + + if (cannot_access_at_index(input_buffer, 0) || buffer_at_offset(input_buffer)[0] != ']') { + goto fail; /* expected end of array */ + } + +success: + input_buffer->depth--; + + if (head != NULL) { + head->prev = current_item; + } + + item->type = cJSON_Array; + item->child = head; + + input_buffer->offset++; + + return true; + +fail: + if (head != NULL) { + cJSON_Delete(head); + } + + return false; +} + +/* Render an array to text */ +static cJSON_bool +print_array(const cJSON *const item, printbuffer *const output_buffer) +{ + unsigned char *output_pointer = NULL; + size_t length = 0; + cJSON * current_element = item->child; + + if (output_buffer == NULL) { + return false; + } + + /* Compose the output array. */ + /* opening square bracket */ + output_pointer = ensure(output_buffer, 1); + if (output_pointer == NULL) { + return false; + } + + *output_pointer = '['; + output_buffer->offset++; + output_buffer->depth++; + + while (current_element != NULL) { + if (!print_value(current_element, output_buffer)) { + return false; + } + update_offset(output_buffer); + if (current_element->next) { + length = (size_t)(output_buffer->format ? 2 : 1); + output_pointer = ensure(output_buffer, length + 1); + if (output_pointer == NULL) { + return false; + } + *output_pointer++ = ','; + if (output_buffer->format) { + *output_pointer++ = ' '; + } + *output_pointer = '\0'; + output_buffer->offset += length; + } + current_element = current_element->next; + } + + output_pointer = ensure(output_buffer, 2); + if (output_pointer == NULL) { + return false; + } + *output_pointer++ = ']'; + *output_pointer = '\0'; + output_buffer->depth--; + + return true; +} + +/* Build an object from the text. */ +static cJSON_bool +parse_object(cJSON *const item, parse_buffer *const input_buffer) +{ + cJSON *head = NULL; /* linked list head */ + cJSON *current_item = NULL; + + if (input_buffer->depth >= CJSON_NESTING_LIMIT) { + return false; /* to deeply nested */ + } + input_buffer->depth++; + + if (cannot_access_at_index(input_buffer, 0) || (buffer_at_offset(input_buffer)[0] != '{')) { + goto fail; /* not an object */ + } + + input_buffer->offset++; + buffer_skip_whitespace(input_buffer); + if (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == '}')) { + goto success; /* empty object */ + } + + /* check if we skipped to the end of the buffer */ + if (cannot_access_at_index(input_buffer, 0)) { + input_buffer->offset--; + goto fail; + } + + /* step back to character in front of the first element */ + input_buffer->offset--; + /* loop through the comma separated array elements */ + do { + /* allocate next item */ + cJSON *new_item = cJSON_New_Item(&(input_buffer->hooks)); + if (new_item == NULL) { + goto fail; /* allocation failure */ + } + + /* attach next item to list */ + if (head == NULL) { + /* start the linked list */ + current_item = head = new_item; + } + else { + /* add to the end and advance */ + current_item->next = new_item; + new_item->prev = current_item; + current_item = new_item; + } + + /* parse the name of the child */ + input_buffer->offset++; + buffer_skip_whitespace(input_buffer); + if (!parse_string(current_item, input_buffer)) { + goto fail; /* failed to parse name */ + } + buffer_skip_whitespace(input_buffer); + + /* swap valuestring and string, because we parsed the name */ + current_item->string = current_item->valuestring; + current_item->valuestring = NULL; + + if (cannot_access_at_index(input_buffer, 0) || (buffer_at_offset(input_buffer)[0] != ':')) { + goto fail; /* invalid object */ + } + + /* parse the value */ + input_buffer->offset++; + buffer_skip_whitespace(input_buffer); + if (!parse_value(current_item, input_buffer)) { + goto fail; /* failed to parse value */ + } + buffer_skip_whitespace(input_buffer); + } while (can_access_at_index(input_buffer, 0) && (buffer_at_offset(input_buffer)[0] == ',')); + + if (cannot_access_at_index(input_buffer, 0) || (buffer_at_offset(input_buffer)[0] != '}')) { + goto fail; /* expected end of object */ + } + +success: + input_buffer->depth--; + + if (head != NULL) { + head->prev = current_item; + } + + item->type = cJSON_Object; + item->child = head; + + input_buffer->offset++; + return true; + +fail: + if (head != NULL) { + cJSON_Delete(head); + } + + return false; +} + +/* Render an object to text. */ +static cJSON_bool +print_object(const cJSON *const item, printbuffer *const output_buffer) +{ + unsigned char *output_pointer = NULL; + size_t length = 0; + cJSON * current_item = item->child; + + if (output_buffer == NULL) { + return false; + } + + /* Compose the output: */ + length = (size_t)(output_buffer->format ? 2 : 1); /* fmt: {\n */ + output_pointer = ensure(output_buffer, length + 1); + if (output_pointer == NULL) { + return false; + } + + *output_pointer++ = '{'; + output_buffer->depth++; + if (output_buffer->format) { + *output_pointer++ = '\n'; + } + output_buffer->offset += length; + + while (current_item) { + if (output_buffer->format) { + size_t i; + output_pointer = ensure(output_buffer, output_buffer->depth); + if (output_pointer == NULL) { + return false; + } + for (i = 0; i < output_buffer->depth; i++) { + *output_pointer++ = '\t'; + } + output_buffer->offset += output_buffer->depth; + } + + /* print key */ + if (!print_string_ptr((unsigned char *)current_item->string, output_buffer)) { + return false; + } + update_offset(output_buffer); + + length = (size_t)(output_buffer->format ? 2 : 1); + output_pointer = ensure(output_buffer, length); + if (output_pointer == NULL) { + return false; + } + *output_pointer++ = ':'; + if (output_buffer->format) { + *output_pointer++ = '\t'; + } + output_buffer->offset += length; + + /* print value */ + if (!print_value(current_item, output_buffer)) { + return false; + } + update_offset(output_buffer); + + /* print comma if not last */ + length = ((size_t)(output_buffer->format ? 1 : 0) + (size_t)(current_item->next ? 1 : 0)); + output_pointer = ensure(output_buffer, length + 1); + if (output_pointer == NULL) { + return false; + } + if (current_item->next) { + *output_pointer++ = ','; + } + + if (output_buffer->format) { + *output_pointer++ = '\n'; + } + *output_pointer = '\0'; + output_buffer->offset += length; + + current_item = current_item->next; + } + + output_pointer = ensure(output_buffer, output_buffer->format ? (output_buffer->depth + 1) : 2); + if (output_pointer == NULL) { + return false; + } + if (output_buffer->format) { + size_t i; + for (i = 0; i < (output_buffer->depth - 1); i++) { + *output_pointer++ = '\t'; + } + } + *output_pointer++ = '}'; + *output_pointer = '\0'; + output_buffer->depth--; + + return true; +} + +/* Get Array size/item / object item. */ +CJSON_PUBLIC(int) cJSON_GetArraySize(const cJSON *array) +{ + cJSON *child = NULL; + size_t size = 0; + + if (array == NULL) { + return 0; + } + + child = array->child; + + while (child != NULL) { + size++; + child = child->next; + } + + /* FIXME: Can overflow here. Cannot be fixed without breaking the API */ + + return (int)size; +} + +static cJSON * +get_array_item(const cJSON *array, size_t index) +{ + cJSON *current_child = NULL; + + if (array == NULL) { + return NULL; + } + + current_child = array->child; + while ((current_child != NULL) && (index > 0)) { + index--; + current_child = current_child->next; + } + + return current_child; +} + +CJSON_PUBLIC(cJSON *) cJSON_GetArrayItem(const cJSON *array, int index) +{ + if (index < 0) { + return NULL; + } + + return get_array_item(array, (size_t)index); +} + +static cJSON * +get_object_item(const cJSON *const object, const char *const name, const cJSON_bool case_sensitive) +{ + cJSON *current_element = NULL; + + if ((object == NULL) || (name == NULL)) { + return NULL; + } + + current_element = object->child; + if (case_sensitive) { + while ((current_element != NULL) && (current_element->string != NULL) && + (strcmp(name, current_element->string) != 0)) { + current_element = current_element->next; + } + } + else { + while ((current_element != NULL) && + (case_insensitive_strcmp((const unsigned char *)name, + (const unsigned char *)(current_element->string)) != 0)) { + current_element = current_element->next; + } + } + + if ((current_element == NULL) || (current_element->string == NULL)) { + return NULL; + } + + return current_element; +} + +CJSON_PUBLIC(cJSON *) cJSON_GetObjectItem(const cJSON *const object, const char *const string) +{ + return get_object_item(object, string, false); +} + +CJSON_PUBLIC(cJSON *) cJSON_GetObjectItemCaseSensitive(const cJSON *const object, const char *const string) +{ + return get_object_item(object, string, true); +} + +CJSON_PUBLIC(cJSON_bool) cJSON_HasObjectItem(const cJSON *object, const char *string) +{ + return cJSON_GetObjectItem(object, string) ? 1 : 0; +} + +/* Utility for array list handling. */ +static void +suffix_object(cJSON *prev, cJSON *item) +{ + prev->next = item; + item->prev = prev; +} + +/* Utility for handling references. */ +static cJSON * +create_reference(const cJSON *item, const internal_hooks *const hooks) +{ + cJSON *reference = NULL; + if (item == NULL) { + return NULL; + } + + reference = cJSON_New_Item(hooks); + if (reference == NULL) { + return NULL; + } + + memcpy(reference, item, sizeof(cJSON)); + reference->string = NULL; + reference->type |= cJSON_IsReference; + reference->next = reference->prev = NULL; + return reference; +} + +static cJSON_bool +add_item_to_array(cJSON *array, cJSON *item) +{ + cJSON *child = NULL; + + if ((item == NULL) || (array == NULL) || (array == item)) { + return false; + } + + child = array->child; + /* + * To find the last item in array quickly, we use prev in array + */ + if (child == NULL) { + /* list is empty, start new one */ + array->child = item; + item->prev = item; + item->next = NULL; + } + else { + /* append to the end */ + if (child->prev) { + suffix_object(child->prev, item); + array->child->prev = item; + } + } + + return true; +} + +/* Add item to array/object. */ +CJSON_PUBLIC(cJSON_bool) cJSON_AddItemToArray(cJSON *array, cJSON *item) +{ + return add_item_to_array(array, item); +} + +#if defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 5)))) +#pragma GCC diagnostic push +#endif +#ifdef __GNUC__ +#pragma GCC diagnostic ignored "-Wcast-qual" +#endif +/* helper function to cast away const */ +static void * +cast_away_const(const void *string) +{ + return (void *)string; +} +#if defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 5)))) +#pragma GCC diagnostic pop +#endif + +static cJSON_bool +add_item_to_object(cJSON *const object, const char *const string, cJSON *const item, + const internal_hooks *const hooks, const cJSON_bool constant_key) +{ + char *new_key = NULL; + int new_type = cJSON_Invalid; + + if ((object == NULL) || (string == NULL) || (item == NULL) || (object == item)) { + return false; + } + + if (constant_key) { + new_key = (char *)cast_away_const(string); + new_type = item->type | cJSON_StringIsConst; + } + else { + new_key = (char *)cJSON_strdup((const unsigned char *)string, hooks); + if (new_key == NULL) { + return false; + } + + new_type = item->type & ~cJSON_StringIsConst; + } + + if (!(item->type & cJSON_StringIsConst) && (item->string != NULL)) { + hooks->deallocate(item->string); + } + + item->string = new_key; + item->type = new_type; + + return add_item_to_array(object, item); +} + +CJSON_PUBLIC(cJSON_bool) cJSON_AddItemToObject(cJSON *object, const char *string, cJSON *item) +{ + return add_item_to_object(object, string, item, &global_hooks, false); +} + +/* Add an item to an object with constant string as key */ +CJSON_PUBLIC(cJSON_bool) cJSON_AddItemToObjectCS(cJSON *object, const char *string, cJSON *item) +{ + return add_item_to_object(object, string, item, &global_hooks, true); +} + +CJSON_PUBLIC(cJSON_bool) cJSON_AddItemReferenceToArray(cJSON *array, cJSON *item) +{ + if (array == NULL) { + return false; + } + + return add_item_to_array(array, create_reference(item, &global_hooks)); +} + +CJSON_PUBLIC(cJSON_bool) cJSON_AddItemReferenceToObject(cJSON *object, const char *string, cJSON *item) +{ + if ((object == NULL) || (string == NULL)) { + return false; + } + + return add_item_to_object(object, string, create_reference(item, &global_hooks), &global_hooks, false); +} + +CJSON_PUBLIC(cJSON *) cJSON_AddNullToObject(cJSON *const object, const char *const name) +{ + cJSON *null = cJSON_CreateNull(); + if (add_item_to_object(object, name, null, &global_hooks, false)) { + return null; + } + + cJSON_Delete(null); + return NULL; +} + +CJSON_PUBLIC(cJSON *) cJSON_AddTrueToObject(cJSON *const object, const char *const name) +{ + cJSON *true_item = cJSON_CreateTrue(); + if (add_item_to_object(object, name, true_item, &global_hooks, false)) { + return true_item; + } + + cJSON_Delete(true_item); + return NULL; +} + +CJSON_PUBLIC(cJSON *) cJSON_AddFalseToObject(cJSON *const object, const char *const name) +{ + cJSON *false_item = cJSON_CreateFalse(); + if (add_item_to_object(object, name, false_item, &global_hooks, false)) { + return false_item; + } + + cJSON_Delete(false_item); + return NULL; +} + +CJSON_PUBLIC(cJSON *) +cJSON_AddBoolToObject(cJSON *const object, const char *const name, const cJSON_bool boolean) +{ + cJSON *bool_item = cJSON_CreateBool(boolean); + if (add_item_to_object(object, name, bool_item, &global_hooks, false)) { + return bool_item; + } + + cJSON_Delete(bool_item); + return NULL; +} + +CJSON_PUBLIC(cJSON *) +cJSON_AddNumberToObject(cJSON *const object, const char *const name, const double number) +{ + cJSON *number_item = cJSON_CreateNumber(number); + if (add_item_to_object(object, name, number_item, &global_hooks, false)) { + return number_item; + } + + cJSON_Delete(number_item); + return NULL; +} + +CJSON_PUBLIC(cJSON *) +cJSON_AddStringToObject(cJSON *const object, const char *const name, const char *const string) +{ + cJSON *string_item = cJSON_CreateString(string); + if (add_item_to_object(object, name, string_item, &global_hooks, false)) { + return string_item; + } + + cJSON_Delete(string_item); + return NULL; +} + +CJSON_PUBLIC(cJSON *) cJSON_AddRawToObject(cJSON *const object, const char *const name, const char *const raw) +{ + cJSON *raw_item = cJSON_CreateRaw(raw); + if (add_item_to_object(object, name, raw_item, &global_hooks, false)) { + return raw_item; + } + + cJSON_Delete(raw_item); + return NULL; +} + +CJSON_PUBLIC(cJSON *) cJSON_AddObjectToObject(cJSON *const object, const char *const name) +{ + cJSON *object_item = cJSON_CreateObject(); + if (add_item_to_object(object, name, object_item, &global_hooks, false)) { + return object_item; + } + + cJSON_Delete(object_item); + return NULL; +} + +CJSON_PUBLIC(cJSON *) cJSON_AddArrayToObject(cJSON *const object, const char *const name) +{ + cJSON *array = cJSON_CreateArray(); + if (add_item_to_object(object, name, array, &global_hooks, false)) { + return array; + } + + cJSON_Delete(array); + return NULL; +} + +CJSON_PUBLIC(cJSON *) cJSON_DetachItemViaPointer(cJSON *parent, cJSON *const item) +{ + if ((parent == NULL) || (item == NULL)) { + return NULL; + } + + if (item != parent->child) { + /* not the first element */ + item->prev->next = item->next; + } + if (item->next != NULL) { + /* not the last element */ + item->next->prev = item->prev; + } + + if (item == parent->child) { + /* first element */ + parent->child = item->next; + } + else if (item->next == NULL) { + /* last element */ + parent->child->prev = item->prev; + } + + /* make sure the detached item doesn't point anywhere anymore */ + item->prev = NULL; + item->next = NULL; + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_DetachItemFromArray(cJSON *array, int which) +{ + if (which < 0) { + return NULL; + } + + return cJSON_DetachItemViaPointer(array, get_array_item(array, (size_t)which)); +} + +CJSON_PUBLIC(void) cJSON_DeleteItemFromArray(cJSON *array, int which) +{ + cJSON_Delete(cJSON_DetachItemFromArray(array, which)); +} + +CJSON_PUBLIC(cJSON *) cJSON_DetachItemFromObject(cJSON *object, const char *string) +{ + cJSON *to_detach = cJSON_GetObjectItem(object, string); + + return cJSON_DetachItemViaPointer(object, to_detach); +} + +CJSON_PUBLIC(cJSON *) cJSON_DetachItemFromObjectCaseSensitive(cJSON *object, const char *string) +{ + cJSON *to_detach = cJSON_GetObjectItemCaseSensitive(object, string); + + return cJSON_DetachItemViaPointer(object, to_detach); +} + +CJSON_PUBLIC(void) cJSON_DeleteItemFromObject(cJSON *object, const char *string) +{ + cJSON_Delete(cJSON_DetachItemFromObject(object, string)); +} + +CJSON_PUBLIC(void) cJSON_DeleteItemFromObjectCaseSensitive(cJSON *object, const char *string) +{ + cJSON_Delete(cJSON_DetachItemFromObjectCaseSensitive(object, string)); +} + +/* Replace array/object items with new ones. */ +CJSON_PUBLIC(cJSON_bool) cJSON_InsertItemInArray(cJSON *array, int which, cJSON *newitem) +{ + cJSON *after_inserted = NULL; + + if (which < 0 || newitem == NULL) { + return false; + } + + after_inserted = get_array_item(array, (size_t)which); + if (after_inserted == NULL) { + return add_item_to_array(array, newitem); + } + + if (after_inserted != array->child && after_inserted->prev == NULL) { + /* return false if after_inserted is a corrupted array item */ + return false; + } + + newitem->next = after_inserted; + newitem->prev = after_inserted->prev; + after_inserted->prev = newitem; + if (after_inserted == array->child) { + array->child = newitem; + } + else { + newitem->prev->next = newitem; + } + return true; +} + +CJSON_PUBLIC(cJSON_bool) +cJSON_ReplaceItemViaPointer(cJSON *const parent, cJSON *const item, cJSON *replacement) +{ + if ((parent == NULL) || (parent->child == NULL) || (replacement == NULL) || (item == NULL)) { + return false; + } + + if (replacement == item) { + return true; + } + + replacement->next = item->next; + replacement->prev = item->prev; + + if (replacement->next != NULL) { + replacement->next->prev = replacement; + } + if (parent->child == item) { + if (parent->child->prev == parent->child) { + replacement->prev = replacement; + } + parent->child = replacement; + } + else { /* + * To find the last item in array quickly, we use prev in array. + * We can't modify the last item's next pointer where this item was the parent's child + */ + if (replacement->prev != NULL) { + replacement->prev->next = replacement; + } + if (replacement->next == NULL) { + parent->child->prev = replacement; + } + } + + item->next = NULL; + item->prev = NULL; + cJSON_Delete(item); + + return true; +} + +CJSON_PUBLIC(cJSON_bool) cJSON_ReplaceItemInArray(cJSON *array, int which, cJSON *newitem) +{ + if (which < 0) { + return false; + } + + return cJSON_ReplaceItemViaPointer(array, get_array_item(array, (size_t)which), newitem); +} + +static cJSON_bool +replace_item_in_object(cJSON *object, const char *string, cJSON *replacement, cJSON_bool case_sensitive) +{ + if ((replacement == NULL) || (string == NULL)) { + return false; + } + + /* replace the name in the replacement */ + if (!(replacement->type & cJSON_StringIsConst) && (replacement->string != NULL)) { + cJSON_free(replacement->string); + } + replacement->string = (char *)cJSON_strdup((const unsigned char *)string, &global_hooks); + if (replacement->string == NULL) { + return false; + } + + replacement->type &= ~cJSON_StringIsConst; + + return cJSON_ReplaceItemViaPointer(object, get_object_item(object, string, case_sensitive), replacement); +} + +CJSON_PUBLIC(cJSON_bool) cJSON_ReplaceItemInObject(cJSON *object, const char *string, cJSON *newitem) +{ + return replace_item_in_object(object, string, newitem, false); +} + +CJSON_PUBLIC(cJSON_bool) +cJSON_ReplaceItemInObjectCaseSensitive(cJSON *object, const char *string, cJSON *newitem) +{ + return replace_item_in_object(object, string, newitem, true); +} + +/* Create basic types: */ +CJSON_PUBLIC(cJSON *) cJSON_CreateNull(void) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if (item) { + item->type = cJSON_NULL; + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateTrue(void) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if (item) { + item->type = cJSON_True; + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateFalse(void) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if (item) { + item->type = cJSON_False; + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateBool(cJSON_bool boolean) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if (item) { + item->type = boolean ? cJSON_True : cJSON_False; + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateNumber(double num) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if (item) { + item->type = cJSON_Number; + item->valuedouble = num; + + /* use saturation in case of overflow */ + if (num >= INT_MAX) { + item->valueint = INT_MAX; + } + else if (num <= (double)INT_MIN) { + item->valueint = INT_MIN; + } + else { + item->valueint = (int)num; + } + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateString(const char *string) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if (item) { + item->type = cJSON_String; + item->valuestring = (char *)cJSON_strdup((const unsigned char *)string, &global_hooks); + if (!item->valuestring) { + cJSON_Delete(item); + return NULL; + } + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateStringReference(const char *string) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if (item != NULL) { + item->type = cJSON_String | cJSON_IsReference; + item->valuestring = (char *)cast_away_const(string); + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateObjectReference(const cJSON *child) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if (item != NULL) { + item->type = cJSON_Object | cJSON_IsReference; + item->child = (cJSON *)cast_away_const(child); + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateArrayReference(const cJSON *child) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if (item != NULL) { + item->type = cJSON_Array | cJSON_IsReference; + item->child = (cJSON *)cast_away_const(child); + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateRaw(const char *raw) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if (item) { + item->type = cJSON_Raw; + item->valuestring = (char *)cJSON_strdup((const unsigned char *)raw, &global_hooks); + if (!item->valuestring) { + cJSON_Delete(item); + return NULL; + } + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateArray(void) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if (item) { + item->type = cJSON_Array; + } + + return item; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateObject(void) +{ + cJSON *item = cJSON_New_Item(&global_hooks); + if (item) { + item->type = cJSON_Object; + } + + return item; +} + +/* Create Arrays: */ +CJSON_PUBLIC(cJSON *) cJSON_CreateIntArray(const int *numbers, int count) +{ + size_t i = 0; + cJSON *n = NULL; + cJSON *p = NULL; + cJSON *a = NULL; + + if ((count < 0) || (numbers == NULL)) { + return NULL; + } + + a = cJSON_CreateArray(); + + for (i = 0; a && (i < (size_t)count); i++) { + n = cJSON_CreateNumber(numbers[i]); + if (!n) { + cJSON_Delete(a); + return NULL; + } + if (!i) { + a->child = n; + } + else { + suffix_object(p, n); + } + p = n; + } + + if (a && a->child) { + a->child->prev = n; + } + + return a; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateFloatArray(const float *numbers, int count) +{ + size_t i = 0; + cJSON *n = NULL; + cJSON *p = NULL; + cJSON *a = NULL; + + if ((count < 0) || (numbers == NULL)) { + return NULL; + } + + a = cJSON_CreateArray(); + + for (i = 0; a && (i < (size_t)count); i++) { + n = cJSON_CreateNumber((double)numbers[i]); + if (!n) { + cJSON_Delete(a); + return NULL; + } + if (!i) { + a->child = n; + } + else { + suffix_object(p, n); + } + p = n; + } + + if (a && a->child) { + a->child->prev = n; + } + + return a; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateDoubleArray(const double *numbers, int count) +{ + size_t i = 0; + cJSON *n = NULL; + cJSON *p = NULL; + cJSON *a = NULL; + + if ((count < 0) || (numbers == NULL)) { + return NULL; + } + + a = cJSON_CreateArray(); + + for (i = 0; a && (i < (size_t)count); i++) { + n = cJSON_CreateNumber(numbers[i]); + if (!n) { + cJSON_Delete(a); + return NULL; + } + if (!i) { + a->child = n; + } + else { + suffix_object(p, n); + } + p = n; + } + + if (a && a->child) { + a->child->prev = n; + } + + return a; +} + +CJSON_PUBLIC(cJSON *) cJSON_CreateStringArray(const char *const *strings, int count) +{ + size_t i = 0; + cJSON *n = NULL; + cJSON *p = NULL; + cJSON *a = NULL; + + if ((count < 0) || (strings == NULL)) { + return NULL; + } + + a = cJSON_CreateArray(); + + for (i = 0; a && (i < (size_t)count); i++) { + n = cJSON_CreateString(strings[i]); + if (!n) { + cJSON_Delete(a); + return NULL; + } + if (!i) { + a->child = n; + } + else { + suffix_object(p, n); + } + p = n; + } + + if (a && a->child) { + a->child->prev = n; + } + + return a; +} + +/* Duplication */ +CJSON_PUBLIC(cJSON *) cJSON_Duplicate(const cJSON *item, cJSON_bool recurse) +{ + cJSON *newitem = NULL; + cJSON *child = NULL; + cJSON *next = NULL; + cJSON *newchild = NULL; + + /* Bail on bad ptr */ + if (!item) { + goto fail; + } + /* Create new item */ + newitem = cJSON_New_Item(&global_hooks); + if (!newitem) { + goto fail; + } + /* Copy over all vars */ + newitem->type = item->type & (~cJSON_IsReference); + newitem->valueint = item->valueint; + newitem->valuedouble = item->valuedouble; + if (item->valuestring) { + newitem->valuestring = (char *)cJSON_strdup((unsigned char *)item->valuestring, &global_hooks); + if (!newitem->valuestring) { + goto fail; + } + } + if (item->string) { + newitem->string = (item->type & cJSON_StringIsConst) + ? item->string + : (char *)cJSON_strdup((unsigned char *)item->string, &global_hooks); + if (!newitem->string) { + goto fail; + } + } + /* If non-recursive, then we're done! */ + if (!recurse) { + return newitem; + } + /* Walk the ->next chain for the child. */ + child = item->child; + while (child != NULL) { + newchild = cJSON_Duplicate(child, true); /* Duplicate (with recurse) each item in the ->next chain */ + if (!newchild) { + goto fail; + } + if (next != NULL) { + /* If newitem->child already set, then crosswire ->prev and ->next and move on */ + next->next = newchild; + newchild->prev = next; + next = newchild; + } + else { + /* Set newitem->child and move to it */ + newitem->child = newchild; + next = newchild; + } + child = child->next; + } + if (newitem && newitem->child) { + newitem->child->prev = newchild; + } + + return newitem; + +fail: + if (newitem != NULL) { + cJSON_Delete(newitem); + } + + return NULL; +} + +static void +skip_oneline_comment(char **input) +{ + *input += static_strlen("//"); + + for (; (*input)[0] != '\0'; ++(*input)) { + if ((*input)[0] == '\n') { + *input += static_strlen("\n"); + return; + } + } +} + +static void +skip_multiline_comment(char **input) +{ + *input += static_strlen("/*"); + + for (; (*input)[0] != '\0'; ++(*input)) { + if (((*input)[0] == '*') && ((*input)[1] == '/')) { + *input += static_strlen("*/"); + return; + } + } +} + +static void +minify_string(char **input, char **output) +{ + (*output)[0] = (*input)[0]; + *input += static_strlen("\""); + *output += static_strlen("\""); + + for (; (*input)[0] != '\0'; (void)++(*input), ++(*output)) { + (*output)[0] = (*input)[0]; + + if ((*input)[0] == '\"') { + (*output)[0] = '\"'; + *input += static_strlen("\""); + *output += static_strlen("\""); + return; + } + else if (((*input)[0] == '\\') && ((*input)[1] == '\"')) { + (*output)[1] = (*input)[1]; + *input += static_strlen("\""); + *output += static_strlen("\""); + } + } +} + +CJSON_PUBLIC(void) cJSON_Minify(char *json) +{ + char *into = json; + + if (json == NULL) { + return; + } + + while (json[0] != '\0') { + switch (json[0]) { + case ' ': + case '\t': + case '\r': + case '\n': + json++; + break; + + case '/': + if (json[1] == '/') { + skip_oneline_comment(&json); + } + else if (json[1] == '*') { + skip_multiline_comment(&json); + } + else { + json++; + } + break; + + case '\"': + minify_string(&json, (char **)&into); + break; + + default: + into[0] = json[0]; + json++; + into++; + } + } + + /* and null-terminate. */ + *into = '\0'; +} + +CJSON_PUBLIC(cJSON_bool) cJSON_IsInvalid(const cJSON *const item) +{ + if (item == NULL) { + return false; + } + + return (item->type & 0xFF) == cJSON_Invalid; +} + +CJSON_PUBLIC(cJSON_bool) cJSON_IsFalse(const cJSON *const item) +{ + if (item == NULL) { + return false; + } + + return (item->type & 0xFF) == cJSON_False; +} + +CJSON_PUBLIC(cJSON_bool) cJSON_IsTrue(const cJSON *const item) +{ + if (item == NULL) { + return false; + } + + return (item->type & 0xff) == cJSON_True; +} + +CJSON_PUBLIC(cJSON_bool) cJSON_IsBool(const cJSON *const item) +{ + if (item == NULL) { + return false; + } + + return (item->type & (cJSON_True | cJSON_False)) != 0; +} +CJSON_PUBLIC(cJSON_bool) cJSON_IsNull(const cJSON *const item) +{ + if (item == NULL) { + return false; + } + + return (item->type & 0xFF) == cJSON_NULL; +} + +CJSON_PUBLIC(cJSON_bool) cJSON_IsNumber(const cJSON *const item) +{ + if (item == NULL) { + return false; + } + + return (item->type & 0xFF) == cJSON_Number; +} + +CJSON_PUBLIC(cJSON_bool) cJSON_IsString(const cJSON *const item) +{ + if (item == NULL) { + return false; + } + + return (item->type & 0xFF) == cJSON_String; +} + +CJSON_PUBLIC(cJSON_bool) cJSON_IsArray(const cJSON *const item) +{ + if (item == NULL) { + return false; + } + + return (item->type & 0xFF) == cJSON_Array; +} + +CJSON_PUBLIC(cJSON_bool) cJSON_IsObject(const cJSON *const item) +{ + if (item == NULL) { + return false; + } + + return (item->type & 0xFF) == cJSON_Object; +} + +CJSON_PUBLIC(cJSON_bool) cJSON_IsRaw(const cJSON *const item) +{ + if (item == NULL) { + return false; + } + + return (item->type & 0xFF) == cJSON_Raw; +} + +CJSON_PUBLIC(cJSON_bool) +cJSON_Compare(const cJSON *const a, const cJSON *const b, const cJSON_bool case_sensitive) +{ + if ((a == NULL) || (b == NULL) || ((a->type & 0xFF) != (b->type & 0xFF))) { + return false; + } + + /* check if type is valid */ + switch (a->type & 0xFF) { + case cJSON_False: + case cJSON_True: + case cJSON_NULL: + case cJSON_Number: + case cJSON_String: + case cJSON_Raw: + case cJSON_Array: + case cJSON_Object: + break; + + default: + return false; + } + + /* identical objects are equal */ + if (a == b) { + return true; + } + + switch (a->type & 0xFF) { + /* in these cases and equal type is enough */ + case cJSON_False: + case cJSON_True: + case cJSON_NULL: + return true; + + case cJSON_Number: + if (compare_double(a->valuedouble, b->valuedouble)) { + return true; + } + return false; + + case cJSON_String: + case cJSON_Raw: + if ((a->valuestring == NULL) || (b->valuestring == NULL)) { + return false; + } + if (strcmp(a->valuestring, b->valuestring) == 0) { + return true; + } + + return false; + + case cJSON_Array: { + cJSON *a_element = a->child; + cJSON *b_element = b->child; + + for (; (a_element != NULL) && (b_element != NULL);) { + if (!cJSON_Compare(a_element, b_element, case_sensitive)) { + return false; + } + + a_element = a_element->next; + b_element = b_element->next; + } + + /* one of the arrays is longer than the other */ + if (a_element != b_element) { + return false; + } + + return true; + } + + case cJSON_Object: { + cJSON *a_element = NULL; + cJSON *b_element = NULL; + cJSON_ArrayForEach(a_element, a) + { + /* TODO This has O(n^2) runtime, which is horrible! */ + b_element = get_object_item(b, a_element->string, case_sensitive); + if (b_element == NULL) { + return false; + } + + if (!cJSON_Compare(a_element, b_element, case_sensitive)) { + return false; + } + } + + /* doing this twice, once on a and b to prevent true comparison if a subset of b + * TODO: Do this the proper way, this is just a fix for now */ + cJSON_ArrayForEach(b_element, b) + { + a_element = get_object_item(a, b_element->string, case_sensitive); + if (a_element == NULL) { + return false; + } + + if (!cJSON_Compare(b_element, a_element, case_sensitive)) { + return false; + } + } + + return true; + } + + default: + return false; + } +} + +CJSON_PUBLIC(void *) cJSON_malloc(size_t size) +{ + return global_hooks.allocate(size); +} + +CJSON_PUBLIC(void) cJSON_free(void *object) +{ + global_hooks.deallocate(object); +} \ No newline at end of file diff --git a/benchmark/idioms/cjson/cJSON.h b/benchmark/idioms/cjson/cJSON.h new file mode 100644 index 000000000..f59532532 --- /dev/null +++ b/benchmark/idioms/cjson/cJSON.h @@ -0,0 +1,329 @@ +/* + Copyright (c) 2009-2017 Dave Gamble and cJSON contributors + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +*/ + +#ifndef cJSON__h +#define cJSON__h + +#ifdef __cplusplus +extern "C" { +#endif + +#if !defined(__WINDOWS__) && (defined(WIN32) || defined(WIN64) || defined(_MSC_VER) || defined(_WIN32)) +#define __WINDOWS__ +#endif + +#ifdef __WINDOWS__ + +/* When compiling for windows, we specify a specific calling convention to avoid issues where we are being +called from a project with a different default calling convention. For windows you have 3 define options: + +CJSON_HIDE_SYMBOLS - Define this in the case where you don't want to ever dllexport symbols +CJSON_EXPORT_SYMBOLS - Define this on library build when you want to dllexport symbols (default) +CJSON_IMPORT_SYMBOLS - Define this if you want to dllimport symbol + +For *nix builds that support visibility attribute, you can define similar behavior by + +setting default visibility to hidden by adding +-fvisibility=hidden (for gcc) +or +-xldscope=hidden (for sun cc) +to CFLAGS + +then using the CJSON_API_VISIBILITY flag to "export" the same symbols the way CJSON_EXPORT_SYMBOLS does + +*/ + +#define CJSON_CDECL __cdecl +#define CJSON_STDCALL __stdcall + +/* export symbols by default, this is necessary for copy pasting the C and header file */ +#if !defined(CJSON_HIDE_SYMBOLS) && !defined(CJSON_IMPORT_SYMBOLS) && !defined(CJSON_EXPORT_SYMBOLS) +#define CJSON_EXPORT_SYMBOLS +#endif + +#if defined(CJSON_HIDE_SYMBOLS) +#define CJSON_PUBLIC(type) type CJSON_STDCALL +#elif defined(CJSON_EXPORT_SYMBOLS) +#define CJSON_PUBLIC(type) __declspec(dllexport) type CJSON_STDCALL +#elif defined(CJSON_IMPORT_SYMBOLS) +#define CJSON_PUBLIC(type) __declspec(dllimport) type CJSON_STDCALL +#endif +#else /* !__WINDOWS__ */ +#define CJSON_CDECL +#define CJSON_STDCALL + +#if (defined(__GNUC__) || defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(CJSON_API_VISIBILITY) +#define CJSON_PUBLIC(type) __attribute__((visibility("default"))) type +#else +#define CJSON_PUBLIC(type) type +#endif +#endif + +/* project version */ +#define CJSON_VERSION_MAJOR 1 +#define CJSON_VERSION_MINOR 7 +#define CJSON_VERSION_PATCH 17 + +#include + +/* cJSON Types: */ +#define cJSON_Invalid (0) +#define cJSON_False (1 << 0) +#define cJSON_True (1 << 1) +#define cJSON_NULL (1 << 2) +#define cJSON_Number (1 << 3) +#define cJSON_String (1 << 4) +#define cJSON_Array (1 << 5) +#define cJSON_Object (1 << 6) +#define cJSON_Raw (1 << 7) /* raw json */ + +#define cJSON_IsReference 256 +#define cJSON_StringIsConst 512 + +/* The cJSON structure: */ +typedef struct cJSON { + /* next/prev allow you to walk array/object chains. Alternatively, use + * GetArraySize/GetArrayItem/GetObjectItem */ + struct cJSON *next; + struct cJSON *prev; + /* An array or object item will have a child pointer pointing to a chain of the items in the array/object. + */ + struct cJSON *child; + + /* The type of the item, as above. */ + int type; + + /* The item's string, if type==cJSON_String and type == cJSON_Raw */ + char *valuestring; + /* writing to valueint is DEPRECATED, use cJSON_SetNumberValue instead */ + int valueint; + /* The item's number, if type==cJSON_Number */ + double valuedouble; + + /* The item's name string, if this item is the child of, or is in the list of subitems of an object. */ + char *string; +} cJSON; + +typedef struct cJSON_Hooks { + /* malloc/free are CDECL on Windows regardless of the default calling convention of the compiler, so + * ensure the hooks allow passing those functions directly. */ + void *(CJSON_CDECL *malloc_fn)(size_t sz); + void(CJSON_CDECL *free_fn)(void *ptr); +} cJSON_Hooks; + +typedef int cJSON_bool; + +/* Limits how deeply nested arrays/objects can be before cJSON rejects to parse them. + * This is to prevent stack overflows. */ +#ifndef CJSON_NESTING_LIMIT +#define CJSON_NESTING_LIMIT 1000 +#endif + +/* returns the version of cJSON as a string */ +CJSON_PUBLIC(const char *) cJSON_Version(void); + +/* Supply malloc, realloc and free functions to cJSON */ +CJSON_PUBLIC(void) cJSON_InitHooks(cJSON_Hooks *hooks); + +/* Memory Management: the caller is always responsible to free the results from all variants of cJSON_Parse + * (with cJSON_Delete) and cJSON_Print (with stdlib free, cJSON_Hooks.free_fn, or cJSON_free as appropriate). + * The exception is cJSON_PrintPreallocated, where the caller has full responsibility of the buffer. */ +/* Supply a block of JSON, and this returns a cJSON object you can interrogate. */ +CJSON_PUBLIC(cJSON *) cJSON_Parse(const char *value); +CJSON_PUBLIC(cJSON *) cJSON_ParseWithLength(const char *value, size_t buffer_length); +/* ParseWithOpts allows you to require (and check) that the JSON is null terminated, and to retrieve the + * pointer to the final byte parsed. */ +/* If you supply a ptr in return_parse_end and parsing fails, then return_parse_end will contain a pointer to + * the error so will match cJSON_GetErrorPtr(). */ +CJSON_PUBLIC(cJSON *) +cJSON_ParseWithOpts(const char *value, const char **return_parse_end, cJSON_bool require_null_terminated); +CJSON_PUBLIC(cJSON *) +cJSON_ParseWithLengthOpts(const char *value, size_t buffer_length, const char **return_parse_end, + cJSON_bool require_null_terminated); + +/* Render a cJSON entity to text for transfer/storage. */ +CJSON_PUBLIC(char *) cJSON_Print(const cJSON *item); +/* Render a cJSON entity to text for transfer/storage without any formatting. */ +CJSON_PUBLIC(char *) cJSON_PrintUnformatted(const cJSON *item); +/* Render a cJSON entity to text using a buffered strategy. prebuffer is a guess at the final size. guessing + * well reduces reallocation. fmt=0 gives unformatted, =1 gives formatted */ +CJSON_PUBLIC(char *) cJSON_PrintBuffered(const cJSON *item, int prebuffer, cJSON_bool fmt); +/* Render a cJSON entity to text using a buffer already allocated in memory with given length. Returns 1 on + * success and 0 on failure. */ +/* NOTE: cJSON is not always 100% accurate in estimating how much memory it will use, so to be safe allocate 5 + * bytes more than you actually need */ +CJSON_PUBLIC(cJSON_bool) +cJSON_PrintPreallocated(cJSON *item, char *buffer, const int length, const cJSON_bool format); +/* Delete a cJSON entity and all subentities. */ +CJSON_PUBLIC(void) cJSON_Delete(cJSON *item); + +/* Returns the number of items in an array (or object). */ +CJSON_PUBLIC(int) cJSON_GetArraySize(const cJSON *array); +/* Retrieve item number "index" from array "array". Returns NULL if unsuccessful. */ +CJSON_PUBLIC(cJSON *) cJSON_GetArrayItem(const cJSON *array, int index); +/* Get item "string" from object. Case insensitive. */ +CJSON_PUBLIC(cJSON *) cJSON_GetObjectItem(const cJSON *const object, const char *const string); +CJSON_PUBLIC(cJSON *) cJSON_GetObjectItemCaseSensitive(const cJSON *const object, const char *const string); +CJSON_PUBLIC(cJSON_bool) cJSON_HasObjectItem(const cJSON *object, const char *string); +/* For analysing failed parses. This returns a pointer to the parse error. You'll probably need to look a few + * chars back to make sense of it. Defined when cJSON_Parse() returns 0. 0 when cJSON_Parse() succeeds. */ +CJSON_PUBLIC(const char *) cJSON_GetErrorPtr(void); + +/* Check item type and return its value */ +CJSON_PUBLIC(char *) cJSON_GetStringValue(const cJSON *const item); +CJSON_PUBLIC(double) cJSON_GetNumberValue(const cJSON *const item); + +/* These functions check the type of an item */ +CJSON_PUBLIC(cJSON_bool) cJSON_IsInvalid(const cJSON *const item); +CJSON_PUBLIC(cJSON_bool) cJSON_IsFalse(const cJSON *const item); +CJSON_PUBLIC(cJSON_bool) cJSON_IsTrue(const cJSON *const item); +CJSON_PUBLIC(cJSON_bool) cJSON_IsBool(const cJSON *const item); +CJSON_PUBLIC(cJSON_bool) cJSON_IsNull(const cJSON *const item); +CJSON_PUBLIC(cJSON_bool) cJSON_IsNumber(const cJSON *const item); +CJSON_PUBLIC(cJSON_bool) cJSON_IsString(const cJSON *const item); +CJSON_PUBLIC(cJSON_bool) cJSON_IsArray(const cJSON *const item); +CJSON_PUBLIC(cJSON_bool) cJSON_IsObject(const cJSON *const item); +CJSON_PUBLIC(cJSON_bool) cJSON_IsRaw(const cJSON *const item); + +/* These calls create a cJSON item of the appropriate type. */ +CJSON_PUBLIC(cJSON *) cJSON_CreateNull(void); +CJSON_PUBLIC(cJSON *) cJSON_CreateTrue(void); +CJSON_PUBLIC(cJSON *) cJSON_CreateFalse(void); +CJSON_PUBLIC(cJSON *) cJSON_CreateBool(cJSON_bool boolean); +CJSON_PUBLIC(cJSON *) cJSON_CreateNumber(double num); +CJSON_PUBLIC(cJSON *) cJSON_CreateString(const char *string); +/* raw json */ +CJSON_PUBLIC(cJSON *) cJSON_CreateRaw(const char *raw); +CJSON_PUBLIC(cJSON *) cJSON_CreateArray(void); +CJSON_PUBLIC(cJSON *) cJSON_CreateObject(void); + +/* Create a string where valuestring references a string so + * it will not be freed by cJSON_Delete */ +CJSON_PUBLIC(cJSON *) cJSON_CreateStringReference(const char *string); +/* Create an object/array that only references it's elements so + * they will not be freed by cJSON_Delete */ +CJSON_PUBLIC(cJSON *) cJSON_CreateObjectReference(const cJSON *child); +CJSON_PUBLIC(cJSON *) cJSON_CreateArrayReference(const cJSON *child); + +/* These utilities create an Array of count items. + * The parameter count cannot be greater than the number of elements in the number array, otherwise array + * access will be out of bounds.*/ +CJSON_PUBLIC(cJSON *) cJSON_CreateIntArray(const int *numbers, int count); +CJSON_PUBLIC(cJSON *) cJSON_CreateFloatArray(const float *numbers, int count); +CJSON_PUBLIC(cJSON *) cJSON_CreateDoubleArray(const double *numbers, int count); +CJSON_PUBLIC(cJSON *) cJSON_CreateStringArray(const char *const *strings, int count); + +/* Append item to the specified array/object. */ +CJSON_PUBLIC(cJSON_bool) cJSON_AddItemToArray(cJSON *array, cJSON *item); +CJSON_PUBLIC(cJSON_bool) cJSON_AddItemToObject(cJSON *object, const char *string, cJSON *item); +/* Use this when string is definitely const (i.e. a literal, or as good as), and will definitely survive the + * cJSON object. WARNING: When this function was used, make sure to always check that (item->type & + * cJSON_StringIsConst) is zero before writing to `item->string` */ +CJSON_PUBLIC(cJSON_bool) cJSON_AddItemToObjectCS(cJSON *object, const char *string, cJSON *item); +/* Append reference to item to the specified array/object. Use this when you want to add an existing cJSON to + * a new cJSON, but don't want to corrupt your existing cJSON. */ +CJSON_PUBLIC(cJSON_bool) cJSON_AddItemReferenceToArray(cJSON *array, cJSON *item); +CJSON_PUBLIC(cJSON_bool) cJSON_AddItemReferenceToObject(cJSON *object, const char *string, cJSON *item); + +/* Remove/Detach items from Arrays/Objects. */ +CJSON_PUBLIC(cJSON *) cJSON_DetachItemViaPointer(cJSON *parent, cJSON *const item); +CJSON_PUBLIC(cJSON *) cJSON_DetachItemFromArray(cJSON *array, int which); +CJSON_PUBLIC(void) cJSON_DeleteItemFromArray(cJSON *array, int which); +CJSON_PUBLIC(cJSON *) cJSON_DetachItemFromObject(cJSON *object, const char *string); +CJSON_PUBLIC(cJSON *) cJSON_DetachItemFromObjectCaseSensitive(cJSON *object, const char *string); +CJSON_PUBLIC(void) cJSON_DeleteItemFromObject(cJSON *object, const char *string); +CJSON_PUBLIC(void) cJSON_DeleteItemFromObjectCaseSensitive(cJSON *object, const char *string); + +/* Update array items. */ +CJSON_PUBLIC(cJSON_bool) +cJSON_InsertItemInArray(cJSON *array, int which, + cJSON *newitem); /* Shifts pre-existing items to the right. */ +CJSON_PUBLIC(cJSON_bool) +cJSON_ReplaceItemViaPointer(cJSON *const parent, cJSON *const item, cJSON *replacement); +CJSON_PUBLIC(cJSON_bool) cJSON_ReplaceItemInArray(cJSON *array, int which, cJSON *newitem); +CJSON_PUBLIC(cJSON_bool) cJSON_ReplaceItemInObject(cJSON *object, const char *string, cJSON *newitem); +CJSON_PUBLIC(cJSON_bool) +cJSON_ReplaceItemInObjectCaseSensitive(cJSON *object, const char *string, cJSON *newitem); + +/* Duplicate a cJSON item */ +CJSON_PUBLIC(cJSON *) cJSON_Duplicate(const cJSON *item, cJSON_bool recurse); +/* Duplicate will create a new, identical cJSON item to the one you pass, in new memory that will + * need to be released. With recurse!=0, it will duplicate any children connected to the item. + * The item->next and ->prev pointers are always zero on return from Duplicate. */ +/* Recursively compare two cJSON items for equality. If either a or b is NULL or invalid, they will be + * considered unequal. case_sensitive determines if object keys are treated case sensitive (1) or case + * insensitive (0) */ +CJSON_PUBLIC(cJSON_bool) +cJSON_Compare(const cJSON *const a, const cJSON *const b, const cJSON_bool case_sensitive); + +/* Minify a strings, remove blank characters(such as ' ', '\t', '\r', '\n') from strings. + * The input pointer json cannot point to a read-only address area, such as a string constant, + * but should point to a readable and writable address area. */ +CJSON_PUBLIC(void) cJSON_Minify(char *json); + +/* Helper functions for creating and adding items to an object at the same time. + * They return the added item or NULL on failure. */ +CJSON_PUBLIC(cJSON *) cJSON_AddNullToObject(cJSON *const object, const char *const name); +CJSON_PUBLIC(cJSON *) cJSON_AddTrueToObject(cJSON *const object, const char *const name); +CJSON_PUBLIC(cJSON *) cJSON_AddFalseToObject(cJSON *const object, const char *const name); +CJSON_PUBLIC(cJSON *) +cJSON_AddBoolToObject(cJSON *const object, const char *const name, const cJSON_bool boolean); +CJSON_PUBLIC(cJSON *) +cJSON_AddNumberToObject(cJSON *const object, const char *const name, const double number); +CJSON_PUBLIC(cJSON *) +cJSON_AddStringToObject(cJSON *const object, const char *const name, const char *const string); +CJSON_PUBLIC(cJSON *) +cJSON_AddRawToObject(cJSON *const object, const char *const name, const char *const raw); +CJSON_PUBLIC(cJSON *) cJSON_AddObjectToObject(cJSON *const object, const char *const name); +CJSON_PUBLIC(cJSON *) cJSON_AddArrayToObject(cJSON *const object, const char *const name); + +/* When assigning an integer value, it needs to be propagated to valuedouble too. */ +#define cJSON_SetIntValue(object, number) \ + ((object) ? (object)->valueint = (object)->valuedouble = (number) : (number)) +/* helper for the cJSON_SetNumberValue macro */ +CJSON_PUBLIC(double) cJSON_SetNumberHelper(cJSON *object, double number); +#define cJSON_SetNumberValue(object, number) \ + ((object != NULL) ? cJSON_SetNumberHelper(object, (double)number) : (number)) +/* Change the valuestring of a cJSON_String object, only takes effect when type of object is cJSON_String */ +CJSON_PUBLIC(char *) cJSON_SetValuestring(cJSON *object, const char *valuestring); + +/* If the object is not a boolean type this does nothing and returns cJSON_Invalid else it returns the new + * type*/ +#define cJSON_SetBoolValue(object, boolValue) \ + ((object != NULL && ((object)->type & (cJSON_False | cJSON_True))) \ + ? (object)->type = \ + ((object)->type & (~(cJSON_False | cJSON_True))) | ((boolValue) ? cJSON_True : cJSON_False) \ + : cJSON_Invalid) + +/* Macro for iterating over an array or object */ +#define cJSON_ArrayForEach(element, array) \ + for (element = (array != NULL) ? (array)->child : NULL; element != NULL; element = element->next) + +/* malloc/free objects using the malloc/free functions that have been set with cJSON_InitHooks */ +CJSON_PUBLIC(void *) cJSON_malloc(size_t size); +CJSON_PUBLIC(void) cJSON_free(void *object); + +#ifdef __cplusplus +} +#endif + +#endif \ No newline at end of file diff --git a/benchmark/idioms/llsm_metadata_bench.c b/benchmark/idioms/llsm_metadata_bench.c new file mode 100644 index 000000000..44e47a674 --- /dev/null +++ b/benchmark/idioms/llsm_metadata_bench.c @@ -0,0 +1,84 @@ +#include +#include +#include +#include +#include +#include "cjson/cJSON.h" +#include +#include +#include "fs/fs_ops.h" +#include "string_utils.h" +#include "timer_utils.h" + +#ifdef ENABLE_MPI +#include "mpi.h" +#endif + +#include "pdc.h" + +const char *LLSM_query_templates = { + "Scan_Iter=%d AND Cam=%s AND Ch=%d AND stackn=%d AND laser_nm=%d AND " + "abstime=%d AND fpgatime=%d AND x_str=%d AND y_str=%d AND z_str=%d AND " + "t_str=%d", // locating a particular + // object with a + // particular set of + // metadata attributes (for the purpose of studying AND operator) + "Scan_Iter=%d AND (Cam=A OR Cam=B) AND Ch=%d AND stackn=%d AND " + "laser_nm=%d AND " + "abstime=%d AND fpgatime=%d AND x_str=%d AND y_str = %d " + "AND z_str=%d AND t_str=%d", // locating all objects + // collected in both + // camara A and B such + // that these objects + // also match with the + // given set of other + // attributes, and in particular, + // with y_str matching either 002 or 004 (for the purpose of studying OR + // operator) + "Scan_Iter=%d AND Cam=%s AND Ch=%d AND stackn=%d AND laser_nm=%d AND " + "abstime=%d AND fpgatime=%d AND x_str=%d AND NOT (y_str=002 AND y_str=004) AND z_str=%d AND " + "t_str=%d", // locating all objects with a particular set of metadata attributes, and in particular, with + // y_str not matching either 002 or 004 (for the purpose of studying negation query) + ""}; + +const char *BOSS_query_templates = { + "ra > %.4f AND ra < %.4f AND dec > %.4f AND dec < %.4f", // locating objects in a sky region + "( ra > %.4f AND ra < %.4f AND dec > %.4f AND dec < %.4f ) OR (ra > %.4f " + "AND ra < %.4f AND dec > %.4f AND dec < %.4f)", // locating objects in two sky regions + "NOT (( ra > %.4f AND ra < %.4f AND dec > %.4f AND dec < %.4f ) OR (ra > %.4f " + "AND ra < %.4f AND dec > %.4f AND dec < %.4f))", // locating objects outside two sky regions + ""}; + +/** + * locating an object by a list of predicates that match all metadata attributes (predicates connected + * only by AND). + */ +void +perform_object_locating_search() +{ + // Q. shall we change the parameter of the query everytime? There is no cache for metadata search + // currently + // P. Run this query for 100 times and report the histogram or breakdown of the time spent on each query + // 16, 32, 64, 128 servers, 120:1 C/S ratio + // Overall Measure -> timing of each query v.s. throughput + // Breakdown -> server time, client time, network time. + // +} + +/** + * perform a search for objects that match a set of metadata attributes, with some of them matching + * different specified values (OR involved). + */ +void +perform_object_selecting_search_with_OR() +{ +} + +/** + * perform a search for objects that match a set of metadata attributes, with some of them not matching + * specified values (NOT involved). + */ +void +perform_object_selecting_search_with_NOT() +{ +} \ No newline at end of file diff --git a/examples/llsm/CMakeLists.txt b/examples/llsm/CMakeLists.txt index 95923865a..33d343230 100644 --- a/examples/llsm/CMakeLists.txt +++ b/examples/llsm/CMakeLists.txt @@ -37,7 +37,7 @@ if(PDC_FOUND) set(LLSM_EXT_LIBRARIES pdc ${LLSM_EXT_LIBRARIES}) endif() -option(USE_SYSTEM_MPI "Use system-installed OpenMP." ON) +option(USE_SYSTEM_MPI "Use system-installed MPI." ON) if(USE_SYSTEM_MPI) find_package(MPI) if(MPI_FOUND) diff --git a/examples/llsm/llsm_importer.c b/examples/llsm/llsm_importer.c index 9f6c0e043..fef6c0e54 100644 --- a/examples/llsm/llsm_importer.c +++ b/examples/llsm/llsm_importer.c @@ -36,18 +36,22 @@ getDoubleTimestamp() } int -parse_console_args(int argc, char *argv[], char **file_name) +parse_console_args(int argc, char *argv[], char **file_name, int *flag_m) { int c, parse_code = -1; - while ((c = getopt(argc, argv, "f:")) != -1) { + while ((c = getopt(argc, argv, "f:m")) != -1) { switch (c) { case 'f': *file_name = optarg; parse_code = 0; break; + case 'm': + *flag_m = 1; + parse_code = 0; + break; default: - fprintf(stderr, "Usage: %s [-f filename]\n", argv[0]); + fprintf(stderr, "Usage: %s [-m] [-f filename]\n", argv[0]); parse_code = -1; exit(EXIT_FAILURE); } @@ -273,8 +277,9 @@ main(int argc, char *argv[]) int bcast_count = 512; double duration = 0, start = 0; char csv_field_types[] = {'s', 's', 'f', 'f', 'f', 'f', 'f', 'f'}; + int flag_m = 0; // flag for metadata. if 1, then only read metadata from CSV without reading images. // parse console argument - int parse_code = parse_console_args(argc, argv, &file_name); + int parse_code = parse_console_args(argc, argv, &file_name, &flag_m); if (parse_code) { return parse_code; } diff --git a/scripts/kvtag_query_scale_mpi/clean.sh b/scripts/kvtag_affix_query_scale/clean.sh similarity index 100% rename from scripts/kvtag_query_scale_mpi/clean.sh rename to scripts/kvtag_affix_query_scale/clean.sh diff --git a/scripts/kvtag_affix_query_scale/gen_script.sh b/scripts/kvtag_affix_query_scale/gen_script.sh new file mode 100755 index 000000000..05eca6eb3 --- /dev/null +++ b/scripts/kvtag_affix_query_scale/gen_script.sh @@ -0,0 +1,92 @@ +#!/bin/bash + +if [[ "$#" -ne 2 ]]; then + echo "Usage: $0 " + exit 1 +fi + +# test if $1 is a number +re='^[0-9]+$' +if ! [[ $1 =~ $re ]] ; then + echo "Error: min_node is not a number" >&2; exit 1 +fi + +if [[ "$1" -lt "1" ]]; then + echo "Error: min_node should be larger than 0" + exit 1 +fi + +if [[ "$1" -gt "512" ]]; then + echo "Error: min_node should be smaller than 512" + exit 1 +fi + +MIN_NODE=$1 +MAX_NODE=512 +MAX_ATTR=1024 +MAX_ATTRLEN=1000 + + + +# Per node configuration of your HPC system. +MAX_PYSICAL_CORE=128 +MAX_HYPERTHREADING=2 + +# Designated number of threads per process on each node +# (this should be associated with -c option in srun) +NUM_THREAD_PER_SERVER_PROC=2 +NUM_THREAD_PER_CLIENT_PROC=2 + +# test if $2 is a string starting with a letter, and followed by a 4-digit number +re='^[a-zA-Z][0-9]{4}$' +if ! [[ $2 =~ $re ]] ; then + echo "Error: proj_name should be a string starting with a letter, and followed by a 4-digit number, e.g. m2021" >&2; exit 1 +fi + +PROJECT_NAME=$2 + + +# Designated number of processes for server anc client on each node +# (this should be associated with -n option in srun) +TOTAL_NUM_CLIENT_PROC=$((128 * MIN_NODE)) +NUM_SERVER_PROC_PER_NODE=1 +NUM_CLIENT_PROC_PER_NODE=$((TOTAL_NUM_CLIENT_PROC)) + + +PROG_BASENAME=kvafxqry + +for (( i = $MIN_NODE; i <= $MAX_NODE; i*=2 )); do + mkdir -p $i + NUM_CLIENT_PROC_PER_NODE=$((TOTAL_NUM_CLIENT_PROC/i)) + AVAIL_CLIENT_THREAD_CORES=$((MAX_PYSICAL_CORE * MAX_HYPERTHREADING - NUM_SERVER_PROC_PER_NODE * NUM_THREAD_PER_SERVER_PROC)) + AVAIL_CLIENT_PHYSICAL_CORES=$((AVAIL_CLIENT_THREAD_CORES / NUM_THREAD_PER_CLIENT_PROC)) + if [[ $(( NUM_CLIENT_PROC_PER_NODE > AVAIL_CLIENT_PHYSICAL_CORES )) -eq 1 ]]; then + NUM_CLIENT_PROC_PER_NODE=$((AVAIL_CLIENT_PHYSICAL_CORES - 2)) + fi + for (( j = 0; j <= 1; j+=1 )); do + for (( q = 0; q < 4; q+=1 )); do + for (( c = 0; c < 2; c+=1 )); do + JOBNAME=${PROG_BASENAME}_${i}_${j}_${q}_${c} + TARGET=./$i/$JOBNAME.sbatch + cp template.sh $TARGET + sed -i "s/JOBNAME/${JOBNAME}/g" $TARGET + sed -i "s/NODENUM/${i}/g" $TARGET + sed -i "s/MPHYSICALCORE/${MAX_PYSICAL_CORE}/g" $TARGET + sed -i "s/MHYPERTHREADING/${MAX_HYPERTHREADING}/g" $TARGET + sed -i "s/N_SERVER_PROC/${NUM_SERVER_PROC_PER_NODE}/g" $TARGET + sed -i "s/N_CLIENT_PROC/${NUM_CLIENT_PROC_PER_NODE}/g" $TARGET + sed -i "s/NTHREAD_PER_SPROC/${NUM_THREAD_PER_SERVER_PROC}/g" $TARGET + sed -i "s/NTHREAD_PER_CPROC/${NUM_THREAD_PER_CLIENT_PROC}/g" $TARGET + sed -i "s/PROJNAME/${PROJECT_NAME}/g" $TARGET + sed -i "s/USING_DART/${j}/g" $TARGET + sed -i "s/QUERY_TYPE/${q}/g" $TARGET + sed -i "s/COMMUNICATION_TYPE/${c}/g" $TARGET + if [[ "$i" -gt "4" ]]; then + sed -i "s/REG//g" $TARGET + else + sed -i "s/DBG//g" $TARGET + fi + done + done + done +done diff --git a/scripts/kvtag_affix_query_scale/submit.sh b/scripts/kvtag_affix_query_scale/submit.sh new file mode 100755 index 000000000..10d664bc7 --- /dev/null +++ b/scripts/kvtag_affix_query_scale/submit.sh @@ -0,0 +1,73 @@ +#!/bin/bash + +MIN_PROC=16 +MAX_PROC=128 + +PROG_BASENAME=kvafxqry + +curdir=$(pwd) + +first_submit=1 + +if [[ "$#" -ne 3 ]]; then + echo "Usage: $0 " + exit 1 +fi + +# test if $1 is a single digit number between 0 and 1 (inclusive) +re='^[0-1]$' +if ! [[ "$1" =~ $re ]]; then + echo "Error: i_type should be a single digit number between 0 and 1 (inclusive), 0 means not using index, 1 means using index" + exit 1 +fi + +# test if $2 is a single digit number between 0 and 3 (inclusive) +re='^[0-3]$' +if ! [[ "$2" =~ $re ]]; then + echo "Error: q_type should be a single digit number between 0 and 3 (inclusive), 0: exact query, 1: prefix query, 2: suffix query, 3: infix query" + exit 1 +fi + +# test if $3 is a single digit number between 0 and 1 (inclusive) +re='^[0-1]$' +if ! [[ "$3" =~ $re ]]; then + echo "Error: c_type should be a single digit number between 0 and 1 (inclusive), 0 means using non-collective mode, 1 means using collective mode" + exit 1 +fi + + +i_type=$1 +q_type=$2 +c_type=$3 + +for (( i = $MIN_PROC; i <= $MAX_PROC; i*=2 )); do + + cd $curdir/$i + + JOBNAME=${PROG_BASENAME}_${i}_${i_type}_${q_type}_${c_type} + TARGET=$JOBNAME.sbatch + + njob=`squeue -u $USER | grep ${PROG_BASENAME} | wc -l` + echo $njob + while [ $njob -ge 16 ] + do + sleeptime=$[ ( $RANDOM % 5 ) ] + sleep $sleeptime + njob=`squeue -u $USER | grep ${PROG_BASENAME} | wc -l` + echo $njob + done + + if [[ $first_submit == 1 ]]; then + # Submit first job w/o dependency + echo "Submitting $TARGET" + job=`sbatch $TARGET` + first_submit=0 + else + echo "Submitting $TARGET after ${job: -8}" + job=`sbatch -d afterany:${job: -8} $TARGET` + fi + + sleeptime=$[ ( $RANDOM % 5 ) ] + sleep $sleeptime + +done diff --git a/scripts/kvtag_query_scale_mpi/template.sh b/scripts/kvtag_affix_query_scale/template.sh similarity index 91% rename from scripts/kvtag_query_scale_mpi/template.sh rename to scripts/kvtag_affix_query_scale/template.sh index a6b57511d..ceb6c8cc4 100755 --- a/scripts/kvtag_query_scale_mpi/template.sh +++ b/scripts/kvtag_affix_query_scale/template.sh @@ -3,7 +3,7 @@ #REGSBATCH -q regular #DBGSBATCH -q debug #SBATCH -N NODENUM -#REGSBATCH -t 2:00:00 +#REGSBATCH -t 3:00:00 #DBGSBATCH -t 0:30:00 #SBATCH -C cpu #SBATCH -J JOBNAME @@ -64,7 +64,7 @@ mkdir -p $PDC_TMPDIR EXECPATH=/global/cfs/cdirs/m2621/wzhang5/perlmutter/install/pdc/share/test/bin TOOLPATH=/global/cfs/cdirs/m2621/wzhang5/perlmutter/install/pdc/share/test/bin SERVER=$EXECPATH/pdc_server.exe -CLIENT=$TOOLPATH/kvtag_query_scale_col +CLIENT=$TOOLPATH/kvtag_affix_query_scale CLOSE=$EXECPATH/close_server chmod +x $EXECPATH/* @@ -103,6 +103,14 @@ echo "" echo "=================" echo "$i Closing server" echo "=================" -stdbuf -i0 -o0 -e0 srun -N 1 -n 1 -c 2 --mem=25600 --cpu_bind=cores $CLOSE +stdbuf -i0 -o0 -e0 srun -N $N_NODE -n $NSERVER -c 2 --mem=25600 --cpu_bind=cores $CLOSE + + +echo "" +echo "=============" +echo "$i restart server" +echo "=============" +stdbuf -i0 -o0 -e0 srun -N $N_NODE -n $NSERVER -c $NUM_THREAD_PER_SERVER_PROC --cpu_bind=cores $SERVER restart & +sleep 5 date diff --git a/scripts/kvtag_range_query_scale/clean.sh b/scripts/kvtag_range_query_scale/clean.sh new file mode 100755 index 000000000..0108aceb6 --- /dev/null +++ b/scripts/kvtag_range_query_scale/clean.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +if [[ "$#" -ne 1 ]]; then + echo "Usage: $0 " + exit 1 +fi + +# test if $1 is a single digit number between 0 and 1 (inclusive) +if [[ "$1" =~ ^[0-1]$ ]]; then + echo "Error: clean_dir should be a single digit number between 0 and 1 (inclusive). 1 means clean the directories named with numbers, 0 means clean the sbatch script only." + exit 1 +fi + +CLEAN_DIR=$1 +MAX_NODE=512 + +# if CLEAN_DIR is set to '1', then clean all the directories named with numbers, otherwise, clean the sbatch script only +find ./ -name "*.sbatch*" -delete + +if [[ "$CLEAN_DIR" -eq "1" ]]; then + for (( i = 1; i <= $MAX_NODE; i*=2 )); do + + rm -rf $i/* + + done +fi \ No newline at end of file diff --git a/scripts/kvtag_range_query_scale/gen_script.sh b/scripts/kvtag_range_query_scale/gen_script.sh new file mode 100755 index 000000000..e020dba62 --- /dev/null +++ b/scripts/kvtag_range_query_scale/gen_script.sh @@ -0,0 +1,92 @@ +#!/bin/bash + +if [[ "$#" -ne 2 ]]; then + echo "Usage: $0 " + exit 1 +fi + +# test if $1 is a number +re='^[0-9]+$' +if ! [[ $1 =~ $re ]] ; then + echo "Error: min_node is not a number" >&2; exit 1 +fi + +if [[ "$1" -lt "1" ]]; then + echo "Error: min_node should be larger than 0" + exit 1 +fi + +if [[ "$1" -gt "512" ]]; then + echo "Error: min_node should be smaller than 512" + exit 1 +fi + +MIN_NODE=$1 +MAX_NODE=512 +MAX_ATTR=1024 +MAX_ATTRLEN=1000 + + + +# Per node configuration of your HPC system. +MAX_PYSICAL_CORE=128 +MAX_HYPERTHREADING=2 + +# Designated number of threads per process on each node +# (this should be associated with -c option in srun) +NUM_THREAD_PER_SERVER_PROC=2 +NUM_THREAD_PER_CLIENT_PROC=2 + +# test if $2 is a string starting with a letter, and followed by a 4-digit number +re='^[a-zA-Z][0-9]{4}$' +if ! [[ $2 =~ $re ]] ; then + echo "Error: proj_name should be a string starting with a letter, and followed by a 4-digit number, e.g. m2021" >&2; exit 1 +fi + +PROJECT_NAME=$2 + + +# Designated number of processes for server anc client on each node +# (this should be associated with -n option in srun) +TOTAL_NUM_CLIENT_PROC=$((128 * MIN_NODE)) +NUM_SERVER_PROC_PER_NODE=1 +NUM_CLIENT_PROC_PER_NODE=$((TOTAL_NUM_CLIENT_PROC)) + + +PROG_BASENAME=kvrngqry + +for (( i = $MIN_NODE; i <= $MAX_NODE; i*=2 )); do + mkdir -p $i + NUM_CLIENT_PROC_PER_NODE=$((TOTAL_NUM_CLIENT_PROC/i)) + AVAIL_CLIENT_THREAD_CORES=$((MAX_PYSICAL_CORE * MAX_HYPERTHREADING - NUM_SERVER_PROC_PER_NODE * NUM_THREAD_PER_SERVER_PROC)) + AVAIL_CLIENT_PHYSICAL_CORES=$((AVAIL_CLIENT_THREAD_CORES / NUM_THREAD_PER_CLIENT_PROC)) + if [[ $(( NUM_CLIENT_PROC_PER_NODE > AVAIL_CLIENT_PHYSICAL_CORES )) -eq 1 ]]; then + NUM_CLIENT_PROC_PER_NODE=$((AVAIL_CLIENT_PHYSICAL_CORES - 2)) + fi + for (( j = 0; j <= 1; j+=1 )); do + for (( q = 0; q < 4; q+=1 )); do + for (( c = 0; c < 2; c+=1 )); do + JOBNAME=${PROG_BASENAME}_${i}_${j}_${q}_${c} + TARGET=./$i/$JOBNAME.sbatch + cp template.sh $TARGET + sed -i "s/JOBNAME/${JOBNAME}/g" $TARGET + sed -i "s/NODENUM/${i}/g" $TARGET + sed -i "s/MPHYSICALCORE/${MAX_PYSICAL_CORE}/g" $TARGET + sed -i "s/MHYPERTHREADING/${MAX_HYPERTHREADING}/g" $TARGET + sed -i "s/N_SERVER_PROC/${NUM_SERVER_PROC_PER_NODE}/g" $TARGET + sed -i "s/N_CLIENT_PROC/${NUM_CLIENT_PROC_PER_NODE}/g" $TARGET + sed -i "s/NTHREAD_PER_SPROC/${NUM_THREAD_PER_SERVER_PROC}/g" $TARGET + sed -i "s/NTHREAD_PER_CPROC/${NUM_THREAD_PER_CLIENT_PROC}/g" $TARGET + sed -i "s/PROJNAME/${PROJECT_NAME}/g" $TARGET + sed -i "s/USING_DART/${j}/g" $TARGET + sed -i "s/QUERY_TYPE/${q}/g" $TARGET + sed -i "s/COMMUNICATION_TYPE/${c}/g" $TARGET + if [[ "$i" -gt "4" ]]; then + sed -i "s/REG//g" $TARGET + else + sed -i "s/DBG//g" $TARGET + fi + done + done + done +done diff --git a/scripts/kvtag_range_query_scale/submit.sh b/scripts/kvtag_range_query_scale/submit.sh new file mode 100755 index 000000000..d50cbcfeb --- /dev/null +++ b/scripts/kvtag_range_query_scale/submit.sh @@ -0,0 +1,73 @@ +#!/bin/bash + +MIN_PROC=16 +MAX_PROC=128 + +PROG_BASENAME=kvrngqry + +curdir=$(pwd) + +first_submit=1 + +if [[ "$#" -ne 3 ]]; then + echo "Usage: $0 " + exit 1 +fi + +# test if $1 is a single digit number between 0 and 1 (inclusive) +re='^[0-1]$' +if ! [[ "$1" =~ $re ]]; then + echo "Error: i_type should be a single digit number between 0 and 1 (inclusive), 0 means not using index, 1 means using index" + exit 1 +fi + +# test if $2 is a single digit number between 0 and 3 (inclusive) +re='^[0-3]$' +if ! [[ "$2" =~ $re ]]; then + echo "Error: q_type should be a single digit number between 0 and 3 (inclusive), 0: exact query, 1: prefix query, 2: suffix query, 3: infix query" + exit 1 +fi + +# test if $3 is a single digit number between 0 and 1 (inclusive) +re='^[0-1]$' +if ! [[ "$3" =~ $re ]]; then + echo "Error: c_type should be a single digit number between 0 and 1 (inclusive), 0 means using non-collective mode, 1 means using collective mode" + exit 1 +fi + + +i_type=$1 +q_type=$2 +c_type=$3 + +for (( i = $MIN_PROC; i <= $MAX_PROC; i*=2 )); do + + cd $curdir/$i + + JOBNAME=${PROG_BASENAME}_${i}_${i_type}_${q_type}_${c_type} + TARGET=$JOBNAME.sbatch + + njob=`squeue -u $USER | grep ${PROG_BASENAME} | wc -l` + echo $njob + while [ $njob -ge 16 ] + do + sleeptime=$[ ( $RANDOM % 5 ) ] + sleep $sleeptime + njob=`squeue -u $USER | grep ${PROG_BASENAME} | wc -l` + echo $njob + done + + if [[ $first_submit == 1 ]]; then + # Submit first job w/o dependency + echo "Submitting $TARGET" + job=`sbatch $TARGET` + first_submit=0 + else + echo "Submitting $TARGET after ${job: -8}" + job=`sbatch -d afterany:${job: -8} $TARGET` + fi + + sleeptime=$[ ( $RANDOM % 5 ) ] + sleep $sleeptime + +done diff --git a/scripts/kvtag_range_query_scale/template.sh b/scripts/kvtag_range_query_scale/template.sh new file mode 100755 index 000000000..885aae09f --- /dev/null +++ b/scripts/kvtag_range_query_scale/template.sh @@ -0,0 +1,116 @@ +#!/bin/bash -l + +#REGSBATCH -q regular +#DBGSBATCH -q debug +#SBATCH -N NODENUM +#REGSBATCH -t 3:00:00 +#DBGSBATCH -t 0:30:00 +#SBATCH -C cpu +#SBATCH -J JOBNAME +#SBATCH -A PROJNAME +#SBATCH -o o%j.JOBNAME.out +#SBATCH -e o%j.JOBNAME.out + +# export PDC_DEBUG=0 + +# This is a script for running PDC in shared mode on Perlmutter +# When running in Shared mode, the client processes and server processes are running on the same node. +# By alternating the number of server processes and the number client processes, you should be able to change the C/S ratio. +# You can simply set the number of server processes, and let the script to calculate the number of client processes. + +# Per node configuration of your HPC system. +MAX_PYSICAL_CORE=MPHYSICALCORE +MAX_HYPERTHREADING=MHYPERTHREADING + +# Designated number of threads per process on each node +# (this should be associated with -c option in srun) +NUM_THREAD_PER_SERVER_PROC=NTHREAD_PER_SPROC +NUM_THREAD_PER_CLIENT_PROC=NTHREAD_PER_CPROC + + +# Designated number of processes for server anc client on each node +# (this should be associated with -n option in srun) +NUM_SERVER_PROC_PER_NODE=N_SERVER_PROC +NUM_CLIENT_PROC_PER_NODE=N_CLIENT_PROC + +# test if the number of threads is no larger than the total number of logical cores +TOTAL_NUM_PROC_PER_NODE=$((NUM_THREAD_PER_SERVER_PROC * NUM_SERVER_PROC_PER_NODE + NUM_THREAD_PER_CLIENT_PROC * NUM_CLIENT_PROC_PER_NODE)) +TOTAL_NUM_LOGICAL_CORE_PER_NODE=$((MAX_PYSICAL_CORE * MAX_HYPERTHREADING)) +if [[ "$TOTAL_NUM_PROC_PER_NODE" -gt "$TOTAL_NUM_LOGICAL_CORE_PER_NODE" ]]; then + echo "Error: TOTAL_NUM_PROC_PER_NODE is larger than TOTAL_NUM_LOGICAL_CORE_PER_NODE" + TOTAL_AVAILABLE_CORE=$((TOTAL_NUM_LOGICAL_CORE_PER_NODE - NUM_THREAD_PER_SERVER_PROC * NUM_SERVER_PROC_PER_NODE)) + NUM_CLIENT_PROC_PER_NODE=$(( TOTAL_AVAILABLE_CORE / NUM_THREAD_PER_CLIENT_PROC)) + echo "fixing the number of client processes to $NUM_CLIENT_PROC_PER_NODE" +fi + +# Set the number of times the test should be repeated. +REPEAT=1 + +# calculate the number of total processes for both server side and client side. +N_NODE=NODENUM +NCLIENT=$((NUM_CLIENT_PROC_PER_NODE * N_NODE)) +NSERVER=$((NUM_SERVER_PROC_PER_NODE * N_NODE)) + +USE_DART=USING_DART +Q_TYPE=QUERY_TYPE +COM_TYPE=COMMUNICATION_TYPE + +# clean up the PDC tmp directory +export PDC_TMPDIR=$SCRATCH/data/pdc/conf +export PDC_TMPDIR=${PDC_TMPDIR}/$N_NODE/$USE_DART/$Q_TYPE/$COM_TYPE +rm -rf $PDC_TMPDIR/* +mkdir -p $PDC_TMPDIR + +EXECPATH=/global/cfs/cdirs/m2621/wzhang5/perlmutter/install/pdc/share/test/bin +TOOLPATH=/global/cfs/cdirs/m2621/wzhang5/perlmutter/install/pdc/share/test/bin +SERVER=$EXECPATH/pdc_server.exe +CLIENT=$TOOLPATH/kvtag_range_query_scale +CLOSE=$EXECPATH/close_server + +chmod +x $EXECPATH/* +chmod +x $TOOLPATH/* + +date + +# OpenMP settings: +# set the OPENMP thread number to the smaller number between $NUM_THREAD_PER_SERVER_PROC and $NUM_THREAD_PER_CLIENT_PROC +export OMP_NUM_THREADS=$((NUM_THREAD_PER_SERVER_PROC < NUM_THREAD_PER_CLIENT_PROC ? NUM_THREAD_PER_SERVER_PROC : NUM_THREAD_PER_CLIENT_PROC)) +export OMP_PLACES=threads +export OMP_PROC_BIND=close + +echo "OMP_NUM_THREADS=$OMP_NUM_THREADS" +echo "NSERVER=$NSERVER" +echo "NUM_THREAD_PER_SERVER_PROC=$NUM_THREAD_PER_SERVER_PROC" +echo "NCLIENT=$NCLIENT" +echo "NUM_THREAD_PER_CLIENT_PROC=$NUM_THREAD_PER_CLIENT_PROC" + +export ATP_ENABLED=1 + +echo "" +echo "=============" +echo "$i Init server" +echo "=============" +stdbuf -i0 -o0 -e0 srun -N $N_NODE -n $NSERVER -c $NUM_THREAD_PER_SERVER_PROC --cpu_bind=cores $SERVER & +sleep 5 + + +echo "============================================" +echo "KVTAGS with $N_NODE nodes" +echo "============================================" +stdbuf -i0 -o0 -e0 srun -N $N_NODE -n $NCLIENT -c $NUM_THREAD_PER_CLIENT_PROC --cpu_bind=cores $CLIENT 1000000 100 10 $USE_DART $Q_TYPE $COM_TYPE + +echo "" +echo "=================" +echo "$i Closing server" +echo "=================" +stdbuf -i0 -o0 -e0 srun -N $N_NODE -n $NSERVER -c 2 --mem=25600 --cpu_bind=cores $CLOSE + + +echo "" +echo "=============" +echo "$i restart server" +echo "=============" +stdbuf -i0 -o0 -e0 srun -N $N_NODE -n $NSERVER -c $NUM_THREAD_PER_SERVER_PROC --cpu_bind=cores $SERVER restart & +sleep 5 + +date diff --git a/scripts/llsm_idioms_bench/clean.sh b/scripts/llsm_idioms_bench/clean.sh new file mode 100755 index 000000000..0108aceb6 --- /dev/null +++ b/scripts/llsm_idioms_bench/clean.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +if [[ "$#" -ne 1 ]]; then + echo "Usage: $0 " + exit 1 +fi + +# test if $1 is a single digit number between 0 and 1 (inclusive) +if [[ "$1" =~ ^[0-1]$ ]]; then + echo "Error: clean_dir should be a single digit number between 0 and 1 (inclusive). 1 means clean the directories named with numbers, 0 means clean the sbatch script only." + exit 1 +fi + +CLEAN_DIR=$1 +MAX_NODE=512 + +# if CLEAN_DIR is set to '1', then clean all the directories named with numbers, otherwise, clean the sbatch script only +find ./ -name "*.sbatch*" -delete + +if [[ "$CLEAN_DIR" -eq "1" ]]; then + for (( i = 1; i <= $MAX_NODE; i*=2 )); do + + rm -rf $i/* + + done +fi \ No newline at end of file diff --git a/scripts/kvtag_query_scale_mpi/gen_script.sh b/scripts/llsm_idioms_bench/gen_script.sh similarity index 99% rename from scripts/kvtag_query_scale_mpi/gen_script.sh rename to scripts/llsm_idioms_bench/gen_script.sh index 3a1765c02..8e77aa3cb 100755 --- a/scripts/kvtag_query_scale_mpi/gen_script.sh +++ b/scripts/llsm_idioms_bench/gen_script.sh @@ -53,7 +53,7 @@ NUM_SERVER_PROC_PER_NODE=1 NUM_CLIENT_PROC_PER_NODE=$((TOTAL_NUM_CLIENT_PROC)) -PROG_BASENAME=kvqry +PROG_BASENAME=idioms for (( i = $MIN_NODE; i <= $MAX_NODE; i*=2 )); do mkdir -p $i diff --git a/scripts/kvtag_query_scale_mpi/submit.sh b/scripts/llsm_idioms_bench/submit.sh similarity index 98% rename from scripts/kvtag_query_scale_mpi/submit.sh rename to scripts/llsm_idioms_bench/submit.sh index 978110349..b394980c7 100755 --- a/scripts/kvtag_query_scale_mpi/submit.sh +++ b/scripts/llsm_idioms_bench/submit.sh @@ -3,7 +3,7 @@ MIN_PROC=16 MAX_PROC=128 -PROG_BASENAME=kvqry +PROG_BASENAME=idioms curdir=$(pwd) diff --git a/scripts/llsm_idioms_bench/template.sh b/scripts/llsm_idioms_bench/template.sh new file mode 100755 index 000000000..a0d997454 --- /dev/null +++ b/scripts/llsm_idioms_bench/template.sh @@ -0,0 +1,123 @@ +#!/bin/bash -l + +#REGSBATCH -q regular +#DBGSBATCH -q debug +#SBATCH -N NODENUM +#REGSBATCH -t 3:00:00 +#DBGSBATCH -t 0:30:00 +#SBATCH -C cpu +#SBATCH -J JOBNAME +#SBATCH -A PROJNAME +#SBATCH -o o%j.JOBNAME.out +#SBATCH -e o%j.JOBNAME.out + +# export PDC_DEBUG=0 + +# This is a script for running PDC in shared mode on Perlmutter +# When running in Shared mode, the client processes and server processes are running on the same node. +# By alternating the number of server processes and the number client processes, you should be able to change the C/S ratio. +# You can simply set the number of server processes, and let the script to calculate the number of client processes. + +# Per node configuration of your HPC system. +MAX_PYSICAL_CORE=MPHYSICALCORE +MAX_HYPERTHREADING=MHYPERTHREADING + +# Designated number of threads per process on each node +# (this should be associated with -c option in srun) +NUM_THREAD_PER_SERVER_PROC=NTHREAD_PER_SPROC +NUM_THREAD_PER_CLIENT_PROC=NTHREAD_PER_CPROC + + +# Designated number of processes for server anc client on each node +# (this should be associated with -n option in srun) +NUM_SERVER_PROC_PER_NODE=N_SERVER_PROC +NUM_CLIENT_PROC_PER_NODE=N_CLIENT_PROC + +# test if the number of threads is no larger than the total number of logical cores +TOTAL_NUM_PROC_PER_NODE=$((NUM_THREAD_PER_SERVER_PROC * NUM_SERVER_PROC_PER_NODE + NUM_THREAD_PER_CLIENT_PROC * NUM_CLIENT_PROC_PER_NODE)) +TOTAL_NUM_LOGICAL_CORE_PER_NODE=$((MAX_PYSICAL_CORE * MAX_HYPERTHREADING)) +if [[ "$TOTAL_NUM_PROC_PER_NODE" -gt "$TOTAL_NUM_LOGICAL_CORE_PER_NODE" ]]; then + echo "Error: TOTAL_NUM_PROC_PER_NODE is larger than TOTAL_NUM_LOGICAL_CORE_PER_NODE" + TOTAL_AVAILABLE_CORE=$((TOTAL_NUM_LOGICAL_CORE_PER_NODE - NUM_THREAD_PER_SERVER_PROC * NUM_SERVER_PROC_PER_NODE)) + NUM_CLIENT_PROC_PER_NODE=$(( TOTAL_AVAILABLE_CORE / NUM_THREAD_PER_CLIENT_PROC)) + echo "fixing the number of client processes to $NUM_CLIENT_PROC_PER_NODE" +fi + +# Set the number of times the test should be repeated. +REPEAT=1 + +# calculate the number of total processes for both server side and client side. +N_NODE=NODENUM +NCLIENT=$((NUM_CLIENT_PROC_PER_NODE * N_NODE)) +NSERVER=$((NUM_SERVER_PROC_PER_NODE * N_NODE)) + +USE_DART=USING_DART +Q_TYPE=QUERY_TYPE +COM_TYPE=COMMUNICATION_TYPE + +# clean up the PDC tmp directory +export PDC_TMPDIR=$SCRATCH/data/pdc/conf +export PDC_TMPDIR=${PDC_TMPDIR}/$N_NODE/$USE_DART/$Q_TYPE/$COM_TYPE +rm -rf $PDC_TMPDIR/* +mkdir -p $PDC_TMPDIR + +EXECPATH=/global/cfs/cdirs/m2621/wzhang5/perlmutter/install/pdc/share/test/bin +TOOLPATH=/global/cfs/cdirs/m2621/wzhang5/perlmutter/install/pdc/share/test/bin +SERVER=$EXECPATH/pdc_server.exe +CLIENT=$TOOLPATH/kvtag_query_scale_col +CLOSE=$EXECPATH/close_server + +chmod +x $EXECPATH/* +chmod +x $TOOLPATH/* + +CSV_FILE=$SCRATCH/data/llsm/metadata/llsm_metadata.csv + + +date + +# OpenMP settings: +# set the OPENMP thread number to the smaller number between $NUM_THREAD_PER_SERVER_PROC and $NUM_THREAD_PER_CLIENT_PROC +export OMP_NUM_THREADS=$((NUM_THREAD_PER_SERVER_PROC < NUM_THREAD_PER_CLIENT_PROC ? NUM_THREAD_PER_SERVER_PROC : NUM_THREAD_PER_CLIENT_PROC)) +export OMP_PLACES=threads +export OMP_PROC_BIND=close + +echo "OMP_NUM_THREADS=$OMP_NUM_THREADS" +echo "NSERVER=$NSERVER" +echo "NUM_THREAD_PER_SERVER_PROC=$NUM_THREAD_PER_SERVER_PROC" +echo "NCLIENT=$NCLIENT" +echo "NUM_THREAD_PER_CLIENT_PROC=$NUM_THREAD_PER_CLIENT_PROC" + +export ATP_ENABLED=1 + +TOTAL_OBJ=1000000 +ROUND=100 +EXPAND_FACTOR=1000 + +echo "" +echo "=============" +echo "$i Init server" +echo "=============" +stdbuf -i0 -o0 -e0 srun -N $N_NODE -n $NSERVER -c $NUM_THREAD_PER_SERVER_PROC --cpu_bind=cores $SERVER & +sleep 5 + + +echo "============================================" +echo "KVTAGS with $N_NODE nodes" +echo "============================================" +stdbuf -i0 -o0 -e0 srun -N $N_NODE -n $NCLIENT -c $NUM_THREAD_PER_CLIENT_PROC --cpu_bind=cores $CLIENT $TOTAL_OBJ $ROUND $EXPAND_FACTOR $USE_DART $Q_TYPE $COM_TYPE $CSV_FILE + +echo "" +echo "=================" +echo "$i Closing server" +echo "=================" +stdbuf -i0 -o0 -e0 srun -N $N_NODE -n $NSERVER -c 2 --mem=25600 --cpu_bind=cores $CLOSE + + +echo "" +echo "=============" +echo "$i restart server" +echo "=============" +stdbuf -i0 -o0 -e0 srun -N $N_NODE -n $NSERVER -c $NUM_THREAD_PER_SERVER_PROC --cpu_bind=cores $SERVER restart & +sleep 5 + +date diff --git a/src/api/include/pdc.h b/src/api/include/pdc.h index 59389c971..3503df7e3 100644 --- a/src/api/include/pdc.h +++ b/src/api/include/pdc.h @@ -50,7 +50,7 @@ int PDC_timing_report(const char *prefix); * * \param pdc_name [IN] Name of the PDC * - * \return PDC id on success/Zero on failure + * \return PDC id on success / -1 on failure */ pdcid_t PDCinit(const char *pdc_name); diff --git a/src/api/include/pdc_client_connect.h b/src/api/include/pdc_client_connect.h index acf8fa7f0..40b964a49 100644 --- a/src/api/include/pdc_client_connect.h +++ b/src/api/include/pdc_client_connect.h @@ -193,6 +193,8 @@ struct _dart_perform_one_thread_param { #define PDC_CLIENT_DATA_SERVER() ((pdc_client_mpi_rank_g / pdc_nclient_per_server_g) % pdc_server_num_g) +// PDC_pe_info_t *PDC_get_pe_info(); + uint32_t PDC_get_client_data_server(); /***************************************/ @@ -1033,7 +1035,7 @@ DART *get_dart_g(); * * */ -dart_server dart_retrieve_server_info_cb(uint32_t serverId); +void dart_retrieve_server_info_cb(dart_server *target_server); /** * Search through dart index with key-value pair. @@ -1075,12 +1077,15 @@ perr_t PDC_Client_search_obj_ref_through_dart_mpi(dart_hash_algo_t hash_algo, ch * \param hash_algo [IN] name of the hashing algorithm * \param attr_key [IN] Name of the attribute * \param attr_value [IN] Value of the attribute + * \param attr_vsize [IN] Size of the attribute value + * \param attr_vtype [IN] Type of the attribute value * \param ref_type [IN] The reference type of the object, e.g. PRIMARY_ID, SECONDARY_ID, SERVER_ID * \param data [IN] Associated value along with the key-value pair. * * \return Non-negative on success/Negative on failure */ -perr_t PDC_Client_delete_obj_ref_from_dart(dart_hash_algo_t hash_algo, char *attr_key, char *attr_val, +perr_t PDC_Client_delete_obj_ref_from_dart(dart_hash_algo_t hash_algo, char *attr_key, void *attr_val, + size_t attr_vsize, pdc_c_var_type_t attr_vtype, dart_object_ref_type_t ref_type, uint64_t data); /** @@ -1089,12 +1094,15 @@ perr_t PDC_Client_delete_obj_ref_from_dart(dart_hash_algo_t hash_algo, char *att * \param hash_algo [IN] name of the hashing algorithm * \param attr_key [IN] Name of the attribute * \param attr_value [IN] Value of the attribute + * \param attr_vsize [IN] Size of the attribute value + * \param attr_vtype [IN] Type of the attribute value * \param ref_type [IN] The reference type of the object, e.g. PRIMARY_ID, SECONDARY_ID, SERVER_ID * \param data [IN] Associated value along with the key-value pair. * * \return Non-negative on success/Negative on failure */ -perr_t PDC_Client_insert_obj_ref_into_dart(dart_hash_algo_t hash_algo, char *attr_key, char *attr_val, +perr_t PDC_Client_insert_obj_ref_into_dart(dart_hash_algo_t hash_algo, char *attr_key, void *attr_val, + size_t attr_vsize, pdc_c_var_type_t attr_vtype, dart_object_ref_type_t ref_type, uint64_t data); /** @@ -1102,4 +1110,6 @@ perr_t PDC_Client_insert_obj_ref_into_dart(dart_hash_algo_t hash_algo, char *att */ void report_avg_server_profiling_rst(); +int get_dart_insert_count(); + #endif /* PDC_CLIENT_CONNECT_H */ diff --git a/src/api/pdc_client_connect.c b/src/api/pdc_client_connect.c index b89006293..499946cdf 100644 --- a/src/api/pdc_client_connect.c +++ b/src/api/pdc_client_connect.c @@ -70,6 +70,7 @@ int is_client_debug_g = 0; pdc_server_selection_t pdc_server_selection_g = PDC_SERVER_DEFAULT; int pdc_client_mpi_rank_g = 0; int pdc_client_mpi_size_g = 1; +int dart_insert_count = 0; // FIXME: this is a temporary solution for printing out debug info, like memory usage. int memory_debug_g = 0; // when it is no longer 0, stop printing debug info. @@ -1492,12 +1493,8 @@ PDC_Client_init() srand(time(NULL)); /* Initialize DART space */ - dart_g = (DART *)calloc(1, sizeof(DART)); - int extra_tree_height = 0; - int replication_factor = pdc_server_num_g / 10; - replication_factor = replication_factor > 0 ? replication_factor : 2; - dart_space_init(dart_g, pdc_client_mpi_size_g, pdc_server_num_g, DART_ALPHABET_SIZE, - extra_tree_height, replication_factor); + dart_g = (DART *)calloc(1, sizeof(DART)); + dart_space_init(dart_g, pdc_server_num_g); server_time_total_g = (int64_t *)calloc(pdc_server_num_g, sizeof(int64_t)); server_call_count_g = (int64_t *)calloc(pdc_server_num_g, sizeof(int64_t)); @@ -8408,45 +8405,41 @@ client_dart_get_server_info_cb(const struct hg_cb_info *callback_info) FUNC_LEAVE(ret_value); } -dart_server -dart_retrieve_server_info_cb(uint32_t serverId) +void +dart_retrieve_server_info_cb(dart_server *server_ptr) { - dart_server ret; - perr_t srv_lookup_rst = PDC_Client_try_lookup_server(serverId, 0); + perr_t srv_lookup_rst = PDC_Client_try_lookup_server(server_ptr->id, 0); if (srv_lookup_rst == FAIL) { - println("the server %d cannot be connected. ", serverId); + println("the server %d cannot be connected. ", server_ptr->id); goto done; } // Mercury comm here. hg_handle_t dart_get_server_info_handle; - HG_Create(send_context_g, pdc_server_info_g[serverId].addr, dart_get_server_info_g, + HG_Create(send_context_g, pdc_server_info_g[server_ptr->id].addr, dart_get_server_info_g, &dart_get_server_info_handle); dart_get_server_info_in_t in; - in.serverId = serverId; + in.serverId = server_ptr->id; struct client_genetic_lookup_args lookup_args; hg_return_t hg_ret = HG_Forward(dart_get_server_info_handle, client_dart_get_server_info_cb, &lookup_args, &in); if (hg_ret != HG_SUCCESS) { fprintf(stderr, "dart_get_server_info_g(): Could not start HG_Forward() on serverId = %ld with host = %s\n", - serverId, pdc_server_info_g[serverId].addr_string); + server_ptr->id, pdc_server_info_g[server_ptr->id].addr_string); HG_Destroy(dart_get_server_info_handle); - return ret; + return; } // Wait for response from server hg_atomic_set32(&atomic_work_todo_g, 1); PDC_Client_check_response(&send_context_g); - ret.id = serverId; - ret.indexed_word_count = lookup_args.int64_value1; - ret.request_count = lookup_args.int64_value2; + server_ptr->indexed_word_count = lookup_args.int64_value1; + server_ptr->request_count = lookup_args.int64_value2; done: HG_Destroy(dart_get_server_info_handle); - - return ret; } DART * @@ -8630,6 +8623,12 @@ _aggregate_dart_results_from_all_servers(struct bulk_args_t *lookup_args, Set *o return total_num_results; } +int +get_dart_insert_count() +{ + return dart_insert_count; +} + uint64_t dart_perform_on_servers(index_hash_result_t **hash_result, int num_servers, dart_perform_one_server_in_t *dart_in, Set *output_set) @@ -8641,6 +8640,8 @@ dart_perform_on_servers(index_hash_result_t **hash_result, int num_servers, uint32_t total_n_meta = 0; dart_op_type_t op_type = dart_in->op_type; + dart_in->src_client_id = pdc_client_mpi_rank_g; + FUNC_ENTER(NULL); stopwatch_t timer; @@ -8655,7 +8656,10 @@ dart_perform_on_servers(index_hash_result_t **hash_result, int num_servers, lookup_args[i].op_type = op_type; if (is_index_write_op(op_type)) { - dart_in->attr_key = strdup((*hash_result)[i].key); + dart_in->vnode_id = (*hash_result)[i].virtual_node_id; + dart_in->attr_key = strdup((*hash_result)[i].key); + dart_in->inserting_suffix = (*hash_result)[i].is_suffix; + dart_insert_count++; } _dart_send_request_to_one_server(server_id, dart_in, &(lookup_args[i]), &(dart_request_handles[i])); @@ -8688,6 +8692,11 @@ dart_perform_on_servers(index_hash_result_t **hash_result, int num_servers, // } // } } + + // println("[CLIENT %d] (dart_perform_on_servers) %s on %d servers and get %d results, time : " + // "%.4f ms. ", + // pdc_client_mpi_rank_g, is_index_write_op(op_type) ? "write dart index" : "read dart index", + // num_servers, total_n_meta, timer_delta_ms(&timer)); // free(dart_request_handles); done: FUNC_LEAVE(ret_value); @@ -8709,39 +8718,10 @@ PDC_Client_search_obj_ref_through_dart(dart_hash_algo_t hash_algo, char *query_s char * k_query = get_key(query_string, '='); char * v_query = get_value(query_string, '='); char * tok = NULL; - char * affix = NULL; dart_op_type_t dart_op; - pattern_type_t dart_query_type = determine_pattern_type(k_query); - switch (dart_query_type) { - case PATTERN_EXACT: - tok = strdup(k_query); - dart_op = OP_EXACT_QUERY; - break; - case PATTERN_PREFIX: - affix = subrstr(k_query, strlen(k_query) - 1); - tok = strdup(affix); - dart_op = OP_PREFIX_QUERY; - break; - case PATTERN_SUFFIX: - affix = substr(k_query, 1); -#ifndef PDC_DART_SFX_TREE - tok = reverse_str(affix); -#else - tok = strdup(affix); -#endif - dart_op = OP_SUFFIX_QUERY; - break; - case PATTERN_MIDDLE: - // tok = (char *)calloc(strlen(k_query)-2, sizeof(char)); - // strncpy(tok, &k_query[1], strlen(k_query)-2); - affix = substring(k_query, 1, strlen(k_query) - 1); - tok = strdup(affix); - dart_op = OP_INFIX_QUERY; - break; - default: - break; - } + dart_determine_query_token_by_key_query(k_query, &tok, &dart_op); + if (tok == NULL) { printf("==PDC_CLIENT[%d]: Error with tok\n", pdc_client_mpi_rank_g); ret_value = FAIL; @@ -8751,10 +8731,12 @@ PDC_Client_search_obj_ref_through_dart(dart_hash_algo_t hash_algo, char *query_s out[0] = NULL; dart_perform_one_server_in_t input_param; - input_param.op_type = dart_query_type; + input_param.op_type = dart_op; input_param.hash_algo = hash_algo; input_param.attr_key = query_string; input_param.attr_val = v_query; + input_param.attr_vsize = strlen(v_query); + input_param.attr_vtype = PDC_STRING; input_param.obj_ref_type = ref_type; // TODO: see if timestamp can help @@ -8800,8 +8782,7 @@ PDC_Client_search_obj_ref_through_dart(dart_hash_algo_t hash_algo, char *query_s // done: free(k_query); free(v_query); - if (affix != NULL) - free(affix); + if (tok != NULL) free(tok); @@ -8815,7 +8796,8 @@ PDC_Client_search_obj_ref_through_dart(dart_hash_algo_t hash_algo, char *query_s } perr_t -PDC_Client_delete_obj_ref_from_dart(dart_hash_algo_t hash_algo, char *attr_key, char *attr_val, +PDC_Client_delete_obj_ref_from_dart(dart_hash_algo_t hash_algo, char *attr_key, void *attr_val, + size_t attr_vsize, pdc_c_var_type_t attr_vtype, dart_object_ref_type_t ref_type, uint64_t data) { @@ -8825,6 +8807,8 @@ PDC_Client_delete_obj_ref_from_dart(dart_hash_algo_t hash_algo, char *attr_key, input_param.hash_algo = hash_algo; input_param.attr_key = attr_key; input_param.attr_val = attr_val; + input_param.attr_vsize = attr_vsize; + input_param.attr_vtype = attr_vtype; input_param.obj_ref_type = ref_type; // FIXME: temporarily ugly implementation here, some assignment can be ignored // and save some bytes for data transfer. @@ -8854,7 +8838,8 @@ PDC_Client_delete_obj_ref_from_dart(dart_hash_algo_t hash_algo, char *attr_key, } perr_t -PDC_Client_insert_obj_ref_into_dart(dart_hash_algo_t hash_algo, char *attr_key, char *attr_val, +PDC_Client_insert_obj_ref_into_dart(dart_hash_algo_t hash_algo, char *attr_key, void *attr_val, + size_t attr_vsize, pdc_c_var_type_t attr_vtype, dart_object_ref_type_t ref_type, uint64_t data) { // println("input: attr_key = %s, attr_val = %s", attr_key, attr_val); @@ -8864,6 +8849,8 @@ PDC_Client_insert_obj_ref_into_dart(dart_hash_algo_t hash_algo, char *attr_key, input_param.hash_algo = hash_algo; input_param.attr_key = attr_key; input_param.attr_val = attr_val; + input_param.attr_vsize = attr_vsize; + input_param.attr_vtype = attr_vtype; input_param.obj_ref_type = ref_type; // FIXME: temporarily ugly implementation here, some assignment can be ignored // and save some bytes for data transfer. @@ -8877,6 +8864,7 @@ PDC_Client_insert_obj_ref_into_dart(dart_hash_algo_t hash_algo, char *attr_key, int num_servers = 0; index_hash_result_t *hash_result = NULL; if (hash_algo == DART_HASH) { + // suffix-tree mode switch will be set during this call. num_servers = DART_hash(dart_g, attr_key, OP_INSERT, dart_retrieve_server_info_cb, &hash_result); } else if (hash_algo == DHT_FULL_HASH) { diff --git a/src/api/pdc_meta/include/pdc_metadata_client.h b/src/api/pdc_meta/include/pdc_metadata_client.h new file mode 100644 index 000000000..a0f277369 --- /dev/null +++ b/src/api/pdc_meta/include/pdc_metadata_client.h @@ -0,0 +1,15 @@ +#ifndef PDC_METADATA_CLIENT_H +#define PDC_METADATA_CLIENT_H + +#include "pdc_client_connect.h" +#include "pdc_client_server_common.h" + +typedef struct { + int n_res; + uint64_t *out; + char * condition; +} separate_query_result_t; + +size_t PDC_metadata_multi_condition_query(char *queryString, uint64_t **object_id_list, uint64_t *count); + +#endif // PDC_METADATA_CLIENT_H \ No newline at end of file diff --git a/src/api/pdc_meta/pdc_metadata_client.c b/src/api/pdc_meta/pdc_metadata_client.c new file mode 100644 index 000000000..9115792cb --- /dev/null +++ b/src/api/pdc_meta/pdc_metadata_client.c @@ -0,0 +1,147 @@ +#include "pdc_metadata_client.h" +#include "pdc_client_connect.h" + +#include +#include +#include +#include +#include "pdc_hash.h" + +#define MAX_CONDITIONS 100 // Maximum number of conditions we expect +#define CONDITION_LENGTH 256 // Maximum length of each condition + +// Function to trim whitespace from the beginning and end of a string +char * +trimWhitespace(char *str) +{ + char *end; + + // Trim leading space + while (isspace((unsigned char)*str)) + str++; + + if (*str == 0) // All spaces? + return str; + + // Trim trailing space + end = str + strlen(str) - 1; + while (end > str && isspace((unsigned char)*end)) + end--; + + // Write new null terminator character + end[1] = '\0'; + + return str; +} + +void +splitExpression(const char *expression, char conditions[][CONDITION_LENGTH], int *count) +{ + char tempExpr[1024]; + char * token; + const char delimiters[] = "AND OR"; + int index = 0; + + // Copy the expression to a temporary buffer and remove parentheses + for (int i = 0, j = 0; i < strlen(expression); ++i) { + if (expression[i] != '(' && expression[i] != ')') { + tempExpr[j++] = expression[i]; + tempExpr[j] = '\0'; // Ensure string is always null-terminated + } + } + + // Split the expression by AND/OR + token = strtok(tempExpr, delimiters); + while (token != NULL && index < MAX_CONDITIONS) { + strcpy(conditions[index], trimWhitespace(token)); + index++; + token = strtok(NULL, delimiters); + } + + *count = index; // Update the count of extracted conditions +} + +/** + * the result parameter should be an array of separate_query_result_t, with the size of conditionCount. + * since conditionCount is known when calling this function, the caller should know how much memory we + * allocated here in this function. + */ +void +send_query_condition_get_separate_result(char conditions[][CONDITION_LENGTH], int conditionCount, + MPI_Comm world_comm, separate_query_result_t **result) +{ + if (conditionCount <= 0) { + printf("No conditions to send\n"); + return; + } + *result = (separate_query_result_t *)malloc(conditionCount * sizeof(separate_query_result_t)); + for (int i = 0; i < conditionCount; i++) { + // Send each condition to a separate server for execution + // The server will execute the condition and return the result to the client + char *condition = conditions[i]; + // We assume non-collective mode by default, and the caller of this fuction is the sender, and we + // just send the request. + int send = 1; + // if this is collective mode, each client will send a different condition to a different server + perr_t rst; + if (world_comm != NULL) { + rst = PDC_Client_search_obj_ref_through_dart_mpi(DART_HASH, condition, REF_PRIMARY_ID, &n_res, + &out, world_comm); + } + int n_res; + uint64_t *out; + perr_t rst = + PDC_Client_search_obj_ref_through_dart(DART_HASH, condition, REF_PRIMARY_ID, &n_res, &out); + if (rst != SUCCEED) { + printf("Error with PDC_Client_search_obj_ref_through_dart\n"); + return; + } + (*result)[i] = (separate_query_result_t){n_res, out, condition}; + } +} + +void +query_execution_and_local_merge(char conditions[][CONDITION_LENGTH], int conditionCount, int isCollective, + uint64_t **object_id_list, uint64_t *count) +{ + // step 1: send each condition to a separate server for execution, from a different rank + separate_query_result_t *separate_result; + send_query_condition_get_separate_result(conditions, conditionCount, isCollective, &separate_result); + // step 2: merge the results from all servers + for (int i = 0; i < conditionCount; i++) { + if (separate_result[i].n_res > 0) { + *object_id_list = (uint64_t *)malloc(separate_result[i].n_res * sizeof(uint64_t)); + memcpy(*object_id_list, separate_result[i].out, separate_result[i].n_res * sizeof(uint64_t)); + *count = separate_result[i].n_res; + break; + } + } +} + +size_t +PDC_metadata_multi_condition_query(char *queryString, int isCollective, uint64_t **object_id_list, + uint64_t *count) +{ + char conditions[MAX_CONDITIONS][CONDITION_LENGTH]; + int conditionCount = 0; + + splitExpression(queryString, conditions, &conditionCount); + + // strategy 1: parallel execution of each condition + query_execution_and_local_merge(conditions, conditionCount, isCollective, object_id_list, count); + // strategy 2: query_execution_and_parallel_merge(conditions, conditionCount, isCollective, + // object_id_list, count); + // strategy 3: initial execution to pick the most selective condition, then + // execute the rest in parallel and merge selectivity_based_optimized_execution(conditions, + // conditionCount, object_id_list, count); + // TODO: implement the above strategy + + // For now, we just return a dummy object ID list + *object_id_list = (uint64_t *)malloc(10 * sizeof(uint64_t)); + for (int i = 0; i < 10; i++) { + (*object_id_list)[i] = i; + } + *count = 10; + + return 10; +} \ No newline at end of file diff --git a/src/commons/CMakeLists.txt b/src/commons/CMakeLists.txt index 9a98dcc9f..e26f99643 100644 --- a/src/commons/CMakeLists.txt +++ b/src/commons/CMakeLists.txt @@ -103,14 +103,14 @@ endif() # Collect all source files file(GLOB_RECURSE PDC_COMMONS_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.c) file(GLOB_RECURSE PDC_COMMONS_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/*.h) -list(FILTER PDC_COMMONS_SOURCES EXCLUDE REGEX "${CMAKE_CURRENT_SOURCE_DIR}/serde/.*") -list(FILTER PDC_COMMONS_HEADERS EXCLUDE REGEX "${CMAKE_CURRENT_SOURCE_DIR}/serde/.*") +# list(FILTER PDC_COMMONS_SOURCES EXCLUDE REGEX "${CMAKE_CURRENT_SOURCE_DIR}/serde/.*") +# list(FILTER PDC_COMMONS_HEADERS EXCLUDE REGEX "${CMAKE_CURRENT_SOURCE_DIR}/serde/.*") list(FILTER PDC_COMMONS_SOURCES EXCLUDE REGEX "${CMAKE_CURRENT_SOURCE_DIR}/.*_test.c") message(STATUS "===PDC_COMMONS_SOURCES: ${PDC_COMMONS_SOURCES}") file(GLOB_RECURSE PDC_COMMONS_TEST_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*_test.c) -list(FILTER PDC_COMMONS_TEST_SRC EXCLUDE REGEX "${CMAKE_CURRENT_SOURCE_DIR}/serde/.*") +# list(FILTER PDC_COMMONS_TEST_SRC EXCLUDE REGEX "${CMAKE_CURRENT_SOURCE_DIR}/serde/.*") message(STATUS "===PDC_COMMONS_TEST_SRC: ${PDC_COMMONS_TEST_SRC}") #------------------------------------------------------------------------------ diff --git a/src/commons/collections/art.c b/src/commons/collections/art.c index 6d15e1da3..d4d774021 100644 --- a/src/commons/collections/art.c +++ b/src/commons/collections/art.c @@ -158,6 +158,9 @@ destroy_node(art_node *n) int art_tree_destroy(art_tree *t) { + if (t == NULL) { + return 0; + } destroy_node(t->root); return 0; } diff --git a/src/commons/collections/include/art.h b/src/commons/collections/include/art.h index 0b918a0bf..eab568e1c 100644 --- a/src/commons/collections/include/art.h +++ b/src/commons/collections/include/art.h @@ -155,6 +155,9 @@ int art_tree_destroy(art_tree *t); inline uint64_t art_size(art_tree *t) { + if (t == NULL) { + return 0; + } return t->size; } #endif diff --git a/src/commons/collections/libhl/include/atomic_defs.h b/src/commons/collections/libhl/include/atomic_defs.h new file mode 100644 index 000000000..b50a76266 --- /dev/null +++ b/src/commons/collections/libhl/include/atomic_defs.h @@ -0,0 +1,64 @@ +#ifndef HL_ATOMIC_DEFS_H +#define HL_ATOMIC_DEFS_H + +#define ATOMIC_READ(_v) __sync_fetch_and_add(&(_v), 0) +#define ATOMIC_INCREMENT(_v) (void)__sync_fetch_and_add(&(_v), 1) +#define ATOMIC_DECREMENT(_v) (void)__sync_fetch_and_sub(&(_v), 1) +#define ATOMIC_INCREASE(_v, _n) __sync_add_and_fetch(&(_v), (_n)) +#define ATOMIC_DECREASE(_v, _n) __sync_sub_and_fetch(&(_v), (_n)) +#define ATOMIC_CAS(_v, _o, _n) __sync_bool_compare_and_swap(&(_v), (_o), (_n)) +#define ATOMIC_CAS_RETURN(_v, _o, _n) __sync_val_compare_and_swap(&(_v), (_o), (_n)) + +#define ATOMIC_SET(_v, _n) {\ + int _b = 0;\ + do {\ + _b = ATOMIC_CAS(_v, ATOMIC_READ(_v), _n);\ + } while (__builtin_expect(!_b, 0));\ +} + +#define ATOMIC_SET_IF(_v, _c, _n, _t) {\ + _t _o = ATOMIC_READ(_v);\ + while (__builtin_expect((_o _c (_n)) && !ATOMIC_CAS(_v, _o, _n), 0)) \ + _o = ATOMIC_READ(_v);\ +} + + +#ifdef THREAD_SAFE + +#define __POSIX_C_SOURCE +#include + +#ifdef __MACH__ +#include +#endif + +#define MUTEX_INIT(_mutex) if (__builtin_expect(pthread_mutex_init(&(_mutex), 0) != 0, 0)) { abort(); } +#define MUTEX_DESTROY(_mutex) pthread_mutex_destroy(&(_mutex)) +#define MUTEX_LOCK(_mutex) if (__builtin_expect(pthread_mutex_lock(&(_mutex)) != 0, 0)) { abort(); } +#define MUTEX_UNLOCK(_mutex) if (__builtin_expect(pthread_mutex_unlock(&(_mutex)) != 0, 0)) { abort(); } +#ifdef __MACH__ +#define SPIN_INIT(_mutex) ((_mutex) = 0) +#define SPIN_DESTROY(_mutex) +#define SPIN_LOCK(_mutex) OSSpinLockLock(&(_mutex)) +#define SPIN_UNLOCK(_mutex) OSSpinLockUnlock(&(_mutex)) +#else +#define SPIN_INIT(_mutex) pthread_spin_init(&(_mutex), 0) +#define SPIN_DESTROY(_mutex) pthread_spin_destroy(&(_mutex)) +#define SPIN_LOCK(_mutex) if (__builtin_expect(pthread_spin_lock(&(_mutex)) != 0, 0)) { abort(); } +#define SPIN_UNLOCK(_mutex) if (__builtin_expect(pthread_spin_unlock(&(_mutex)) != 0, 0)) { abort(); } +#endif +#else +#define MUTEX_INIT(_mutex) +#define MUTEX_DESTROY(_mutex) +#define MUTEX_LOCK(_mutex) +#define MUTEX_UNLOCK(_mutex) +#define SPIN_INIT(_mutex) +#define SPIN_DESTROY(_mutex) +#define SPIN_LOCK(_mutex) +#define SPIN_UNLOCK(_mutex) +#endif + +#endif //ATOMIC_DEFS_H + +// vim: tabstop=4 shiftwidth=4 expandtab: +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ diff --git a/src/commons/collections/libhl/include/comparators.h b/src/commons/collections/libhl/include/comparators.h new file mode 100644 index 000000000..565e2b362 --- /dev/null +++ b/src/commons/collections/libhl/include/comparators.h @@ -0,0 +1,322 @@ +#ifndef HL_COMPARATORS_H +#define HL_COMPARATORS_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include +#include "pdc_generic.h" + +/** + * @brief Callback that, if provided, will be used to compare node keys. + * If not defined memcmp() will be used in the following way : + * @param k1 The first key to compare + * @param k1size The size of the first key to compare + * @param k2 The second key to compare + * @param k2size The size of the second key to compare + * @return The distance between the two keys. + * 0 will be returned if the keys match (both size and value);\n + * "k1size - k2size" will be returned if the two sizes don't match;\n + * The difference between the two keys is returned if the two sizes + * match but the value doesn't + * @note By default memcmp() is be used to compare the value, a custom + * comparator can be + * registered at creation time (as parameter of rbtree_create()) + * @note If integers bigger than 8 bits are going to be used as keys, + * an integer comparator should be used instead of the default one + * (either a custom comparator or one of the rbtree_cmp_keys_int16(), + * rbtree_cmp_keys_int32() and rbtree_cmp_keys_int64() helpers provided + * by the library). + * + */ +typedef int (*libhl_cmp_callback_t)(void *k1, size_t k1size, void *k2, size_t k2size); + +#define LIBHL_CAST_KEYS(_type, _k1) \ + { \ + _type _k1i = *((_type *)_k1); \ + int rst = (int)_k1i; \ + return rst >= 0 ? rst : -rst; \ + } + +#define LIBHL_CMP_KEYS_TYPE(_type, _k1, _k1s, _k2, _k2s) \ + { \ + if (_k1s < sizeof(_type) || _k2s < sizeof(_type) || _k1s != _k2s) \ + return _k1s - _k2s; \ + _type _k1i = *((_type *)_k1); \ + _type _k2i = *((_type *)_k2); \ + return _k1i - _k2i; \ + } + +#define LIBHL_CMP_KEYS_NO_TYPE(_type, _k1, _k1s, _k2, _k2s) \ + { \ + if (_k1 == NULL && _k2 == NULL) \ + return 0; \ + if (_k1 == NULL) \ + return -1; \ + if (_k2 == NULL) \ + return 1; \ + _type _k1i = *((_type *)_k1); \ + _type _k2i = *((_type *)_k2); \ + return _k1i - _k2i; \ + } + +static int +libhl_cast_any_to_int(void *k1, size_t k1size, void *k2, size_t k2size) +{ + int rst = *(int *)k1; + return rst >= 0 ? rst : -rst; +} + +static int +libhl_cast_int_to_int(void *k1, size_t k1size, void *k2, size_t k2size) +{ + LIBHL_CAST_KEYS(int, k1); +} + +static int +libhl_cast_long_to_int(void *k1, size_t k1size, void *k2, size_t k2size) +{ + LIBHL_CAST_KEYS(long, k1); +} + +static int +libhl_cast_int16_to_int(void *k1, size_t k1size, void *k2, size_t k2size) +{ + LIBHL_CAST_KEYS(int16_t, k1); +} + +static int +libhl_cast_int32_to_int(void *k1, size_t k1size, void *k2, size_t k2size) +{ + LIBHL_CAST_KEYS(int32_t, k1); +} + +static int +libhl_cast_int64_to_int(void *k1, size_t k1size, void *k2, size_t k2size) +{ + LIBHL_CAST_KEYS(int64_t, k1); +} + +static int +libhl_cast_float_to_int(void *k1, size_t k1size, void *k2, size_t k2size) +{ + LIBHL_CAST_KEYS(float, k1); +} + +static int +libhl_cast_double_to_int(void *k1, size_t k1size, void *k2, size_t k2size) +{ + LIBHL_CAST_KEYS(double, k1); +} + +static int +libhl_cmp_keys_string(void *k1, size_t k1size, void *k2, size_t k2size) +{ + return strcmp((const char *)k1, (const char *)k2); +} + +/** + * @brief int signed integers comparator + */ +static int +libhl_cmp_keys_int(void *k1, size_t k1size, void *k2, size_t k2size) +{ + LIBHL_CMP_KEYS_NO_TYPE(int, k1, k1size, k2, k2size); +} + +/** + * @brief long signed integers comparator + */ +static int +libhl_cmp_keys_long(void *k1, size_t k1size, void *k2, size_t k2size) +{ + LIBHL_CMP_KEYS_NO_TYPE(long, k1, k1size, k2, k2size); +} + +/** + * @brief 16bit signed integers comparator + */ +static int +libhl_cmp_keys_int16(void *k1, size_t k1size, void *k2, size_t k2size) +{ + LIBHL_CMP_KEYS_NO_TYPE(int16_t, k1, k1size, k2, k2size); +} + +/** + * @brief 32bit signed integers comparator + */ +static int +libhl_cmp_keys_int32(void *k1, size_t k1size, void *k2, size_t k2size) +{ + LIBHL_CMP_KEYS_NO_TYPE(int32_t, k1, k1size, k2, k2size); +} + +/** + * @brief 64bit signed integers comparator + */ +static int +libhl_cmp_keys_int64(void *k1, size_t k1size, void *k2, size_t k2size) +{ + LIBHL_CMP_KEYS_NO_TYPE(int64_t, k1, k1size, k2, k2size); +} + +/** + * @brief 16bit unsigned integers comparator + */ +static int +libhl_cmp_keys_uint16(void *k1, size_t k1size, void *k2, size_t k2size) +{ + LIBHL_CMP_KEYS_NO_TYPE(uint16_t, k1, k1size, k2, k2size); +} + +/** + * @brief 32bit unsigned integers comparator + */ +static int +libhl_cmp_keys_uint32(void *k1, size_t k1size, void *k2, size_t k2size) +{ + LIBHL_CMP_KEYS_NO_TYPE(uint32_t, k1, k1size, k2, k2size); +} + +/** + * @brief 64bit unsigned integers comparator + */ +static int +libhl_cmp_keys_uint64(void *k1, size_t k1size, void *k2, size_t k2size) +{ + LIBHL_CMP_KEYS_NO_TYPE(uint64_t, k1, k1size, k2, k2size); +} + +/** + * @brief float comparator + */ +static int +libhl_cmp_keys_float(void *k1, size_t k1size, void *k2, size_t k2size) +{ + LIBHL_CMP_KEYS_NO_TYPE(float, k1, k1size, k2, k2size); +} + +/** + * @brief double comparator + */ +static int +libhl_cmp_keys_double(void *k1, size_t k1size, void *k2, size_t k2size) +{ + LIBHL_CMP_KEYS_NO_TYPE(double, k1, k1size, k2, k2size); +} + +static libhl_cmp_callback_t cmp_cb_array[PDC_TYPE_COUNT] = { + NULL, //"PDC_UNKNOWN", + NULL, //"PDC_SHORT", + libhl_cmp_keys_int32, //"PDC_INT", + NULL, //"PDC_UINT", + libhl_cmp_keys_long, // "PDC_LONG", + NULL, //"PDC_INT8", + NULL, //"PDC_UINT8", + libhl_cmp_keys_int16, // "PDC_INT16", + libhl_cmp_keys_uint16, // "PDC_UINT16", + libhl_cmp_keys_int32, //"PDC_INT32", + libhl_cmp_keys_uint32, //"PDC_UINT32", + libhl_cmp_keys_int64, //"PDC_INT64", + libhl_cmp_keys_uint64, //"PDC_UINT64", + libhl_cmp_keys_float, //"PDC_FLOAT", + libhl_cmp_keys_double, //"PDC_DOUBLE", + NULL, //"PDC_CHAR", + NULL, //"PDC_STRING", + NULL, //"PDC_BOOLEAN", + NULL, //"PDC_VOID_PTR", + NULL, //"PDC_SIZE_T", + NULL, //"PDC_BULKI", + NULL, //"PDC_BULKI_ENT" +}; + +static libhl_cmp_callback_t locate_cb_array[PDC_TYPE_COUNT] = { + NULL, //"PDC_UNKNOWN", + NULL, //"PDC_SHORT", + libhl_cast_int32_to_int, //"PDC_INT", + NULL, //"PDC_UINT", + libhl_cast_long_to_int, // "PDC_LONG", + NULL, //"PDC_INT8", + NULL, //"PDC_UINT8", + libhl_cast_int16_to_int, // "PDC_INT16", + libhl_cast_any_to_int, // "PDC_UINT16", + libhl_cast_int32_to_int, //"PDC_INT32", + libhl_cast_any_to_int, //"PDC_UINT32", + libhl_cast_int64_to_int, //"PDC_INT64", + libhl_cast_any_to_int, //"PDC_UINT64", + libhl_cast_float_to_int, //"PDC_FLOAT", + libhl_cast_double_to_int, //"PDC_DOUBLE", + NULL, //"PDC_CHAR", + NULL, //"PDC_STRING", + NULL, //"PDC_BOOLEAN", + NULL, //"PDC_VOID_PTR", + NULL, //"PDC_SIZE_T", + NULL, //"PDC_BULKI", + NULL, //"PDC_BULKI_ENT" +}; + +#define LIBHL_CMP_CB(DT) cmp_cb_array[DT] +#define LIBHL_LOCATE_CB(DT) locate_cb_array[DT] + +#define LIBHL_CHOOSE_CMP_CB(DT) \ + libhl_cmp_callback_t cmp_cb; \ + libhl_cmp_callback_t locate_cb; \ + switch (DT) { \ + case INT: \ + cmp_cb = libhl_cmp_keys_int; \ + locate_cb = libhl_cast_int_to_int; \ + break; \ + case LONG: \ + cmp_cb = libhl_cmp_keys_long; \ + locate_cb = libhl_cast_long_to_int; \ + break; \ + case FLOAT: \ + cmp_cb = libhl_cmp_keys_float; \ + locate_cb = libhl_cast_float_to_int; \ + break; \ + case DOUBLE: \ + cmp_cb = libhl_cmp_keys_double; \ + locate_cb = libhl_cast_double_to_int; \ + break; \ + case INT16: \ + cmp_cb = libhl_cmp_keys_int16; \ + locate_cb = libhl_cast_int16_to_int; \ + break; \ + case INT32: \ + cmp_cb = libhl_cmp_keys_int32; \ + locate_cb = libhl_cast_int32_to_int; \ + break; \ + case INT64: \ + cmp_cb = libhl_cmp_keys_int64; \ + locate_cb = libhl_cast_int64_to_int; \ + break; \ + case UINT16: \ + cmp_cb = libhl_cmp_keys_uint16; \ + locate_cb = libhl_cast_any_to_int; \ + break; \ + case UINT32: \ + cmp_cb = libhl_cmp_keys_uint32; \ + locate_cb = libhl_cast_any_to_int; \ + break; \ + case UINT64: \ + cmp_cb = libhl_cmp_keys_uint64; \ + locate_cb = libhl_cast_any_to_int; \ + break; \ + default: \ + cmp_cb = NULL; \ + locate_cb = NULL; \ + break; \ + } + +#ifdef __cplusplus +} +#endif + +#endif + +// vim: tabstop=4 shiftwidth=4 expandtab: +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ diff --git a/src/commons/collections/libhl/include/linklist.h b/src/commons/collections/libhl/include/linklist.h new file mode 100644 index 000000000..bb2c80755 --- /dev/null +++ b/src/commons/collections/libhl/include/linklist.h @@ -0,0 +1,428 @@ +/** + * @file linklist.h + * @author Andrea Guzzo + * @date 22/09/2013 + * @brief Fast thread-safe linklist implementation + * @note In case of failures reported from the pthread interface + * abort() will be called. Callers can catch SIGABRT if more + * actions need to be taken. + */ +#ifndef HL_LINKLIST_H +#define HL_LINKLIST_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#ifdef WIN32 +#ifdef THREAD_SAFE +#include +#endif +#endif +#include // for memset + +/** + * @brief Callback that, if provided, will be called to release the value resources + * when an item is being removed from the list + */ +typedef void (*free_value_callback_t)(void *v); + +typedef int (*list_comparator_callback_t)(void *v1, void *v2); + +/** + * @brief Opaque structure representing the actual linked list descriptor + */ +typedef struct _linked_list_s linked_list_t; + + +/******************************************************************** + * Common API + ********************************************************************/ + +/* List creation and destruction routines */ + +/** + * @brief Create a new list + * @return a newly allocated and initialized list + */ +linked_list_t *list_create(); + +/** + * @brief Initialize a pre-allocated list + * + * This function can be used to initialize a statically defined list + * @return 0 on success; -1 otherwise + */ +int list_init(linked_list_t *list); + +/** + * @brief Release all resources related to the list + * @param list : A valid pointer to a linked_list_t structure + */ +void list_destroy(linked_list_t *list); + +/** + * @brief remove all items from the list + * @param list : A valid pointer to a linked_list_t structure + */ +void list_clear(linked_list_t *list); + +/** + * @brief Return the total count of items in the list + * @param list : A valid pointer to a linked_list_t structure + * @return the actual number of items stored in the list + */ +size_t list_count(linked_list_t *list); + +/** + * @brief Set the callback which must be called to release values stored in the list + * @param list : A valid pointer to a linked_list_t structure + * @param free_value_cb : an free_value_callback_t function + */ +void list_set_free_value_callback(linked_list_t *list, free_value_callback_t free_value_cb); + +/** + * @brief Lock the list + * @param list : A valid pointer to a linked_list_t structure + */ +void list_lock(linked_list_t *list); + +/** + * @brief Unlock the list + * @param list : A valid pointer to a linked_list_t structure + */ +void list_unlock(linked_list_t *list); + +/******************************************************************** + * Value-based API + ********************************************************************/ + + +/* List access routines */ + +/** + * @brief Remove last value from the list + * @param list : A valid pointer to a linked_list_t structure + * @return The value previous tail of the list + */ +void *list_pop_value(linked_list_t *list); + +/** + * @brief Append a new value to the list (tail) + * @param list : A valid pointer to a linked_list_t structure + * @param val : The value to store in the tail of the list + * @return : 0 if success, -1 otherwise + */ +int list_push_value(linked_list_t *list, void *val); + +/** + * @brief Insert a new value at the beginning of the least (head) + * @param list : A valid pointer to a linked_list_t structure + * @param val : The value to store in the head of the list + * @return : 0 if success, -1 otherwise + */ +int list_unshift_value(linked_list_t *list, void *val); + +/** + * @brief Remove the first value from the list + * @param list : A valid pointer to a linked_list_t structure + * @return The previous value stored in the tail of the list + */ + +void *list_shift_value(linked_list_t *list); + +/** + * @brief Insert a value at a specific position + * @param list : A valid pointer to a linked_list_t structure + * @param val : The value to store at pos + * @param pos : The position (offset) where to store the value + * @return 0 if success, -1 otherwise + * + * If the list is shorter than pos-1 empty values will be inserted up to + * that position before inserting the new one + */ +int list_insert_value(linked_list_t *list, void *val, size_t pos); + + +/** + * @brief Set the value at a specific position + * @param list : A valid pointer to a linked_list_t structure + * @param pos : The position (offset) where to store the value + * @param val : The value to store at pos + * + * This function will replace the value at pos if present or insert it if missing + * filling in the gaps with NULL values if the length of the list is shorter than pos + */ +void *list_set_value(linked_list_t *list, size_t pos, void *val); + +/** + * @brief Replace the value stored at a specific position with a new value + * @param list : A valid pointer to a linked_list_t structure + * @param pos : The position of the value we want to replace + * @param val : The new value + */ +void *list_subst_value(linked_list_t *list, size_t pos, void *val); + + +/** + * @brief Pick the value at a specific position + * @param list : A valid pointer to a linked_list_t structure + * @param pos : The position (offset) of the requested value + * @return : The value stored at pos if any, NULL otherwise + * + * Note this is a read-only access and the value will not be removed from the list + */ +void *list_pick_value(linked_list_t *list, size_t pos); + +/** + * @brief Fetch (aka: Pick and Remove) the value at a specific position + * @param list : A valid pointer to a linked_list_t structure + * @param pos : The position (offset) of the requested value + * @return : The value stored at pos if any, NULL otherwise + * + * Note this is a read-write access and the value will be removed from the list before returning it. + * The value will not be released so the free_value_callback won't be called in this case + */ +void *list_fetch_value(linked_list_t *list, size_t pos); + +/** + * @brief Move an existing value to a new position + * @param list : A valid pointer to a linked_list_t structure + * @param srcPos : The actual position of the value we want to move + * @param dstPos : The new position where to move the value to + * @return : 0 if success, -1 otherwise + */ +int list_move_value(linked_list_t *list, size_t srcPos, size_t dstPos); + +/** + * @brief Swap two values + * @param list : A valid pointer to a linked_list_t structure + * @param pos1 : The position of the first value to swap with a second one + * @param pos2 : The position of the second value to swap with the first + * @return 0 if success, -1 otherwise + */ +int list_swap_values(linked_list_t *list, size_t pos1, size_t pos2); + + +/** + * @brief Callback for the value iterator + * @return 1 to go ahead with the iteration, + * 0 to stop the iteration, + * -1 to remove the current item from the list and go ahead with the iteration + * -2 to remove the current item from the list and stop the iteration + */ +typedef int (*item_handler_t)(void *item, size_t idx, void *user); + +/* list iterator. This iterator can be used for both Tag-based and Value-based lists. + * If tagged, items can simply be casted to a tagged_value_t pointer. + * @return The number of items visited during the iteration + */ +int list_foreach_value(linked_list_t *list, item_handler_t item_handler, void *user); + +/******************************************************************** + * Tag-based API + ********************************************************************/ + +/** + * @brief Tagged Value + * + * This structure represent a tagged_value_t and is the main datatype + * you will have to handle when working with the tagged-based api. + * If user extract such structure from the list (removing it from the list) + * then he MUST release its resources trough a call to destroy_tagged_value + * when finished using it. + * If a new tagged_value must be created and inserted in a list, then + * list_create_tagged_value() should be used to allocate resources and obtain + * a pointer to a tagged_value_t structure. + */ +typedef struct _tagged_value_s { + char *tag; + void *value; + size_t vlen; + char type; +#define TV_TYPE_STRING 0 +#define TV_TYPE_BINARY 1 +#define TV_TYPE_LIST 2 +} tagged_value_t; + + +/* List creation and destruction routines */ + +/* Tagged List access routines (same of previous but with tag support */ +/** + * @brief Allocate resources for a new tagged value + * @param tag : The tag + * @param val : The value + * @param len : The size of the value + * @return a newly created tagged value with the provided tag and value + * + * Both the tag and the value will be copied. len will be the size used by the copy + */ +tagged_value_t *list_create_tagged_value(char *tag, void *val, size_t len); + +/** + * @brief Allocate resources for a new tagged value without copying the value + * @param tag : The tag + * @param val : The value + * @return A newly created tagged value with the provided tag and value + * + * Only the tag will be copied, the value will just point + * to the provided value without it being copied + */ +tagged_value_t *list_create_tagged_value_nocopy(char *tag, void *val); + +/** + * @brief Create a tagged value where the value is a linked_list_t + * @param tag : The tag + * @param list: The list used as value + * @return A newly created tagged value with type TV_TYPE_LIST + * + * This function is just an accessor to set the tagged_value->type properly + * when using it to store a list + */ +tagged_value_t *list_create_tagged_sublist(char *tag, linked_list_t *list); + +/** + * @brief Release resources used by the tagged value tval + * @param tval : The tagged value to release + */ +void list_destroy_tagged_value(tagged_value_t *tval); + +/** + * @brief Same as pop_value but expect the value to be a pointer to a tagged_value_t structure + * @param list : A valid pointer to a linked_list_t structure holding tagged values + * @return The tagged value stored at the end of the list + */ +tagged_value_t *list_pop_tagged_value(linked_list_t *list); + +/** + * @brief Same as push_value but when using the list to store tagged values + * @param list : A valid pointer to a linked_list_t structure holding tagged values + * @param tval: The new tagged value to store + * @return 0 if success, -1 otherwise + */ +int list_push_tagged_value(linked_list_t *list, tagged_value_t *tval); + +/** + * @brief Same as unshift_value but when using the list to store tagged values + * @param list : A valid pointer to a linked_list_t structure holding tagged values + * @param tval: The new tagged value to store + * @return 0 if success, -1 otherwise + */ +int list_unshift_tagged_value(linked_list_t *list, tagged_value_t *tval); + +/** + * @brief Same as shift_value but when using the list to store tagged values + * @param list : A valid pointer to a linked_list_t structure holding tagged values + * @return The tagged value stored in the head of the list, NULL if the list is empty + */ +tagged_value_t *list_shift_tagged_value(linked_list_t *list); + +/** + * @brief Same as insert_value but when using the list to store tagged values + * @param list : A valid pointer to a linked_list_t structure holding tagged values + * @param tval: The new tagged value to store + * @param pos: The position (index) where to store the new tagged value + * @return 0 if success, -1 otherwise + */ +int list_insert_tagged_value(linked_list_t *list, tagged_value_t *tval, size_t pos); + +/** + * @brief Same as pick_value but when using the list to store tagged values + * @param list : A valid pointer to a linked_list_t structure holding tagged values + * @param pos : The position (offset) of the requested tagged value + * @return : The tagged value stored at pos if any, NULL otherwise + * + * Note this is a read-only access and the tagged value will not be removed from the list + */ +tagged_value_t *list_pick_tagged_value(linked_list_t *list, size_t pos); + +/** + * @brief Same as fetch_value but when using the list to store tagged values + * @param list : A valid pointer to a linked_list_t structure holding tagged values + * @param pos : The position (offset) of the requested tagged value + * @return : The tagged value stored at pos if any, NULL otherwise + * + * Note this is a read-write access and the tagged value will be removed from + * the list before returning it. + * The tagged value will not be released + */ +tagged_value_t *list_fetch_tagged_value(linked_list_t *list, size_t pos); + +/** + * @brief Get a tagged value from the list by using its tag instead of the position + * @param list : A valid pointer to a linked_list_t structure holding tagged values + * @param tag : The tag of the value we are looking for + * @return The first tagged value in the list whose tag matches the provided tag + * + * Note this is a read-only access and the tagged value will not be removed from the list + */ +tagged_value_t *list_get_tagged_value(linked_list_t *list, char *tag); + +/** + * @brief Set a new tagged value in the list. If the list already + * contains values with the same tag, the first occurrence will be replaced with the new value + * (but still at the same index in the list) + * @param list: The list used as value + * @param tag: the tag of the item + * @param value: the value pointer + * @param len: the length of the value + * @param copy: leave a copy or not. + * @return The previous tagged_value_t matching the given tag if any; NULL otherwise + * @note If a tagged value with the same tag is already contained in the list, + * this function will replace the old tagged_value_t structure with the + * new one preserving the position in the list.\n + * If no matching tagged_value_t structure is found, then the new one + * is added to the end of the list + */ +tagged_value_t *list_set_tagged_value(linked_list_t *list, char *tag, void *value, size_t len, int copy); + + +/** + * @brief Get all value pointers for all tagged values matching a specific tag + * @param list : A valid pointer to a linked_list_t structure holding tagged values + * @param tag : The tag of the values we are looking for + * @param values : a valid pointer to a linked_list_t structure where to put the + * value pointers held by the tagged_value_t items matching the provided tag + * @return The number of tagged values matching the tag and added to the values linked list + * + * Note The caller MUST NOT release resources for the returned values + * (since still pointed by the tagged_value_t still in list) + */ +size_t list_get_tagged_values(linked_list_t *list, char *tag, linked_list_t *values); + +/** + * @brief Sort the content of the list using an in-place quicksort algorithm and a + * provided callback able to compare the value stored in the list + * @param list : A valid pointer to a linked_list_t structure holding tagged values + * @param comparator : A valid list_comparator_callback_t callback able to compare the + * actual value stored in the list + */ +void list_sort(linked_list_t *list, list_comparator_callback_t comparator); + + +// size_t get_mem_usage_by_all_linkedlist(); + +/******************************************************************** + * Slice API + ********************************************************************/ + +typedef struct _slice_s slice_t; + +slice_t *slice_create(linked_list_t *list, size_t offset, size_t length); + +void slice_destroy(slice_t *slice); + +int slice_foreach_value(slice_t *slice, item_handler_t item_handler, void *user); + +#ifdef __cplusplus +} +#endif + +#endif + +// vim: tabstop=4 shiftwidth=4 expandtab: +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ diff --git a/src/commons/collections/libhl/include/rbtree.h b/src/commons/collections/libhl/include/rbtree.h new file mode 100644 index 000000000..4f3bff79e --- /dev/null +++ b/src/commons/collections/libhl/include/rbtree.h @@ -0,0 +1,286 @@ +/** + * @file rbtree.h + * + * @brief Red/Black Tree + * + * Red/Black Tree implementation to store/access arbitrary data + * + * @todo extend the api to allow walking from/to a specific node + * (instead of only allowing to walk the entire tree) + * + */ + +#ifndef HL_RBTREE_H +#define HL_RBTREE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include "comparators.h" +// #include "../../utils/profile/mem_perf.h" + +/** + * @brief Opaque structure representing the tree + */ +typedef struct _rbt_s rbt_t; + +/** + * @brief Callback that, if provided, will be called to release the value + * resources when an item is being overwritten or when removed from + * the tree + * @param v the pointer to free + */ +typedef void (*rbt_free_value_callback_t)(void *v); + +/** + * @brief Create a new red/black tree + * @param cmp_keys_cb The comparator callback to use when comparing + * keys (defaults to memcmp()) + * @param free_value_cb The callback used to release values when a node + * is removed or overwritten + * @return A valid and initialized red/black tree (empty) + */ +rbt_t *rbt_create(libhl_cmp_callback_t cmp_keys_cb, rbt_free_value_callback_t free_value_cb); + +/** + * @brief Create a new red/black tree + * @param dtype The data type of the keys + * @param free_value_cb The callback used to release values when a node + * is removed or overwritten + * @return A valid and initialized red/black tree (empty) + */ +rbt_t *rbt_create_by_dtype(pdc_c_var_type_t dtype, rbt_free_value_callback_t free_value_cb); + +/** + * @brief Set the dtype to rbt + * @param rbt A valid pointer to an initialized rbt_t structure + * @param dtype The data type of the keys + * @return + */ +void rbt_set_dtype(rbt_t *rbt, pdc_c_var_type_t dtype); + +/** + * @brief Get the dtype of rbt + * @param rbt A valid pointer to an initialized rbt_t structure + * @return dtype + */ +pdc_c_var_type_t rbt_get_dtype(rbt_t *rbt); + +/** + * @brief Release all the resources used by a red/black tree + * @param rbt A valid pointer to an initialized rbt_t structure + */ +void rbt_destroy(rbt_t *rbt); + +/** + * @brief Add a new value into the tree + * @param rbt A valid pointer to an initialized rbt_t structure + * @param key The key of the node where to store the new value + * @param klen The size of the key + * @param value The new value to store + * @return 0 if a new node has been created successfully; + * 1 if an existing node has been found and the value has been updated; + * -1 otherwise + */ +int rbt_add(rbt_t *rbt, void *key, size_t klen, void *value); + +/** + * @brief Remove a node from the tree + * @param rbt A valid pointer to an initialized rbt_t structure + * @param key The key of the node to remove + * @param klen The size of the key + * @param value If not NULL the address of the value hold by the removed node + * will be stored at the memory pointed by the 'value' argument. + * If NULL and a free_value_callback is set, the value hold by + * the removed node will be released using the free_value_callback + * @return 0 on success; -1 otherwise + */ +int rbt_remove(rbt_t *rbt, void *key, size_t klen, void **value); + +/** + * @brief Find the value stored in the node node matching a specific key + * (if any) + * @param rbt A valid pointer to an initialized rbt_t structure + * @param key The key of the node where to store the new value + * @param klen The size of the key + * @param value A reference to the pointer which will set to point to the + * actual value if found + * @return 0 on success and *value is set to point to the stored + * value and its size;\n-1 if not found + */ +int rbt_find(rbt_t *rbt, void *key, size_t klen, void **value); + +typedef enum { + RBT_WALK_STOP = 0, + RBT_WALK_CONTINUE = 1, + RBT_WALK_DELETE_AND_CONTINUE = -1, + RBT_WALK_DELETE_AND_STOP = -2 +} rbt_walk_return_code_t; + +/** + * @brief Callback called for each node when walking the tree + * @param rbt A valid pointer to an initialized rbt_t structure + * @param key The key of the node where to store the new value + * @param klen The size of the key + * @param value The new value to store + * @param priv The private pointer passed to either rbt_walk() + * or rbt_walk_sorted() + * @return + * RBT_WALK_CONTINUE If the walker can go ahead visiting the next node,\n + * RBT_WALK_STOP if the walker should stop and return\n + * RBT_WALK_DELETE_AND_CONTINUE if the current node should be removed and the + * walker can go ahead\n + * RBT_WALK_DELETE_AND_STOP if the current node should be removed and the + * walker should stop + */ +typedef rbt_walk_return_code_t (*rbt_walk_callback)(rbt_t *rbt, void *key, size_t klen, void *value, + void *priv); + +/** + * @brief Walk the entire tree and call the callback for each visited node + * @param rbt A valid pointer to an initialized rbt_t structure + * @param cb The callback to call for each visited node + * @param priv A pointer to private data provided passed as argument to the + * callback when invoked. + * @return The number of visited nodes + */ +int rbt_walk(rbt_t *rbt, rbt_walk_callback cb, void *priv); + +/** + * @brief Walk the entire tree visiting nodes in ascending order and call the callback + * for each visited node + * @param rbt A valid pointer to an initialized rbt_t structure + * @param cb The callback to call for each visited node + * @param priv A pointer to private data provided passed as argument to the + * callback when invoked. + * @return The number of visited nodes + */ +int rbt_walk_sorted(rbt_t *rbt, rbt_walk_callback cb, void *priv); + +/** + * @brief Walk the node with its key within the given range in the tree and call the callback for each visited + * node + * @param rbt A valid pointer to an initialized rbt_t structure + * @param begin_key key to begin with + * @param bgk_size size of the key to begin with + * @param end_key key to end with + * @param edk_size size of the key to end with + * @param cb The callback to call for each visited node + * @param priv A pointer to private data provided passed as argument to the + * callback when invoked. + * @param begin_inclusive whether the begin_key is inclusive or not + * @param end_inclusive whether the end_key is inclusive or not + * @return The number of visited nodes + * + */ +int rbt_range_walk(rbt_t *rbt, void *begin_key, size_t bgk_size, void *end_key, size_t edk_size, + rbt_walk_callback cb, void *priv, int begin_inclusive, int end_inclusive); + +/** + * @brief Walk the node with its key within the given range in the tree in ascending order and call the + * callback for each visited node + * @param rbt A valid pointer to an initialized rbt_t structure + * @param begin_key key to begin with + * @param bgk_size size of the key to begin with + * @param end_key key to end with + * @param edk_size size of the key to end with + * @param cb The callback to call for each visited node + * @param priv A pointer to private data provided passed as argument to the + * callback when invoked. + * @param begin_inclusive whether the begin_key is inclusive or not + * @param end_inclusive whether the end_key is inclusive or not + * @return The number of visited nodes + */ +int rbt_range_walk_sorted(rbt_t *rbt, void *begin_key, size_t bgk_size, void *end_key, size_t edk_size, + rbt_walk_callback cb, void *priv, int begin_inclusive, int end_inclusive); + +/** + * @brief Walk the node with its key less than or equal to the given key in the tree and call the callback + * + * @param rbt A valid pointer to an initialized rbt_t structure + * @param end_key key to end with + * @param edk_size size of the key to end with + * @param cb The callback to call for each visited node + * @param priv A pointer to private data provided passed as argument to the + * callback when invoked. + * @param end_inclusive whether the end_key is inclusive or not + * @return The number of visited nodes + */ +int rbt_range_lt(rbt_t *rbt, void *end_key, size_t edk_size, rbt_walk_callback cb, void *priv, + int end_inclusive); + +/** + * @brief Walk the node with its key greater than or equal to the given key in the tree and call the callback + * for each visited node + * @param rbt A valid pointer to an initialized rbt_t structure + * @param begin_key key to begin with + * @param bgk_size size of the key to begin with + * @param cb The callback to call for each visited node + * @param priv A pointer to private data provided passed as argument to the + * callback when invoked. + * @param begin_inclusive whether the begin_key is inclusive or not + * @return The number of visited nodes + */ +int rbt_range_gt(rbt_t *rbt, void *begin_key, size_t bgk_size, rbt_walk_callback cb, void *priv, + int begin_inclusive); + +/** + * @brief Walk the node with its key less than or equal to the given key in the tree in ascending order and + * call the callback for each visited node + * @param rbt A valid pointer to an initialized rbt_t structure + * @param end_key key to end with + * @param edk_size size of the key to end with + * @param cb The callback to call for each visited node + * @param priv A pointer to private data provided passed as argument to the + * callback when invoked. + * @param end_inclusive whether the end_key is inclusive or not + * @return The number of visited nodes + */ +int rbt_range_lt_sorted(rbt_t *rbt, void *end_key, size_t edk_size, rbt_walk_callback cb, void *priv, + int end_inclusive); + +/** + * @brief Walk the node with its key greater than or equal to the given key in the tree in ascending order and + * call the callback for each visited node + * @param rbt A valid pointer to an initialized rbt_t structure + * @param begin_key key to begin with + * @param bgk_size size of the key to begin with + * @param cb The callback to call for each visited node + * @param priv A pointer to private data provided passed as argument to the + * callback when invoked. + * @param begin_inclusive whether the begin_key is inclusive or not + * @return The number of visited nodes + */ +int rbt_range_gt_sorted(rbt_t *rbt, void *begin_key, size_t bgk_size, rbt_walk_callback cb, void *priv, + int begin_inclusive); + +/** + * @brief Return the size of the tree, which is the number of nodes in the tree + * @param rbt A valid pointer to an initialized rbt_t structure + * @return The number of nodes in the tree. + */ +uint64_t rbt_size(rbt_t *rbt); + +// perf_info_t *get_perf_info_rbtree(rbt_t *index_root); + +// void reset_perf_info_counters_rbtree(rbt_t *rbt); + +// size_t get_mem_usage_by_all_rbtrees(); + +#ifdef DEBUG_RBTREE +/** + * @brief Print out the whole tree on stdout (for debugging purposes only) + */ +void rbtree_print(rbtree_t *rbt); +#endif + +#ifdef __cplusplus +} +#endif + +#endif // HL_RBTREE_H + +// vim: tabstop=4 shiftwidth=4 expandtab: +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ diff --git a/src/commons/collections/libhl/linklist.c b/src/commons/collections/libhl/linklist.c new file mode 100644 index 000000000..fd2de72ac --- /dev/null +++ b/src/commons/collections/libhl/linklist.c @@ -0,0 +1,1241 @@ +/* linked list management library - by xant + */ + +//#include +#include +#include +#include + +#include "linklist.h" +#include "atomic_defs.h" +#include "pdc_malloc.h" + +// size_t mem_usage_by_all_linkedlist; + +typedef struct _list_entry_s { + struct _linked_list_s *list; + struct _list_entry_s * prev; + struct _list_entry_s * next; + void * value; + int tagged; +} list_entry_t; + +struct _linked_list_s { + list_entry_t *head; + list_entry_t *tail; + list_entry_t *cur; + size_t pos; + size_t length; +#ifdef THREAD_SAFE + pthread_mutex_t lock; +#endif + free_value_callback_t free_value_cb; + int refcnt; + list_entry_t * slices; +}; + +struct _slice_s { + linked_list_t *list; + size_t offset; + size_t length; +}; + +/******************************************************************** + * Entry-based API + * - Internal use only + ********************************************************************/ + +/* Entry creation and destruction routines */ +static inline list_entry_t *create_entry(); +static inline void destroy_entry(list_entry_t *entry); + +/* List and list_entry_t manipulation routines */ +static inline list_entry_t *pop_entry(linked_list_t *list); +static inline int push_entry(linked_list_t *list, list_entry_t *entry); +static inline int unshift_entry(linked_list_t *list, list_entry_t *entry); +static inline list_entry_t *shift_entry(linked_list_t *list); +static inline int insert_entry(linked_list_t *list, list_entry_t *entry, size_t pos); +static inline list_entry_t *pick_entry(linked_list_t *list, size_t pos); +static inline list_entry_t *fetch_entry(linked_list_t *list, size_t pos); +// list_entry_t *SelectEntry(linked_list_t *list, size_t pos); +static inline list_entry_t *remove_entry(linked_list_t *list, size_t pos); +static inline long get_entry_position(list_entry_t *entry); +static inline int move_entry(linked_list_t *list, size_t srcPos, size_t dstPos); +static inline list_entry_t *subst_entry(linked_list_t *list, size_t pos, list_entry_t *entry); +static inline int swap_entries(linked_list_t *list, size_t pos1, size_t pos2); + +/* + * Create a new linked_list_t. Allocates resources and returns + * a linked_list_t opaque structure for later use + */ +linked_list_t * +list_create() +{ + linked_list_t *list = (linked_list_t *)PDC_calloc(1, sizeof(linked_list_t)); + if (list) { + if (list_init(list) != 0) { + free(list); + return NULL; + } + } + return list; +} + +/* + * Initialize a preallocated linked_list_t pointed by list + * useful when using static list handlers + */ +int +list_init(linked_list_t *list __attribute__((unused))) +{ +#ifdef THREAD_SAFE + pthread_mutexattr_t attr; + if (pthread_mutexattr_init(&attr) != 0) { + return -1; + } + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); + if (pthread_mutex_init(&list->lock, &attr) != 0) { + return -1; + } + pthread_mutexattr_destroy(&attr); +#endif + return 0; +} + +/* + * Destroy a linked_list_t. Free resources allocated for list + */ +void +list_destroy(linked_list_t *list) +{ + if (list) { + while (list->slices) + slice_destroy(list->slices->value); + list_clear(list); +#ifdef THREAD_SAFE + MUTEX_DESTROY(list->lock); +#endif + free(list); + } +} + +static void +list_destroy_tagged_value_internal(tagged_value_t *tval, void (*free_cb)(void *v)) +{ + if (tval) { + free(tval->tag); + if (tval->value) { + if (tval->type == TV_TYPE_LIST) + list_destroy((linked_list_t *)tval->value); + else if (free_cb) + free_cb(tval->value); + else if (tval->vlen) + free(tval->value); + } + free(tval); + } +} + +/* + * Clear a linked_list_t. Removes all entries in list + * if values are associated to entries, resources for those will not be freed. + * list_clear() can be used safely with entry-based and tagged-based api, + * otherwise you must really know what you are doing + */ +void +list_clear(linked_list_t *list) +{ + list_entry_t *e; + /* Destroy all entries still in list */ + while ((e = shift_entry(list)) != NULL) { + /* if there is a tagged_value_t associated to the entry, + * let's free memory also for it */ + if (e->tagged && e->value) + list_destroy_tagged_value_internal((tagged_value_t *)e->value, list->free_value_cb); + else if (list->free_value_cb) + list->free_value_cb(e->value); + + destroy_entry(e); + } +} + +/* Returns actual lenght of linked_list_t pointed by l */ +size_t +list_count(linked_list_t *l) +{ + size_t len; + MUTEX_LOCK(l->lock); + len = l->length; + MUTEX_UNLOCK(l->lock); + return len; +} + +void +list_set_free_value_callback(linked_list_t *list, free_value_callback_t free_value_cb) +{ + MUTEX_LOCK(list->lock); + list->free_value_cb = free_value_cb; + MUTEX_UNLOCK(list->lock); +} + +void +list_lock(linked_list_t *list __attribute__((unused))) +{ + MUTEX_LOCK(list->lock); +} + +void +list_unlock(linked_list_t *list __attribute__((unused))) +{ + MUTEX_UNLOCK(list->lock); +} + +/* + * Create a new list_entry_t structure. Allocates resources and returns + * a pointer to the just created list_entry_t opaque structure + */ +static inline list_entry_t * +create_entry() +{ + list_entry_t *new_entry = (list_entry_t *)PDC_calloc(1, sizeof(list_entry_t)); + /* + if (!new_entry) { + fprintf(stderr, "Can't create new entry: %s", strerror(errno)); + } + */ + return new_entry; +} + +/* + * Free resources allocated for a list_entry_t structure + * If the entry is linked in a list this routine will also unlink correctly + * the entry from the list. + */ +static inline void +destroy_entry(list_entry_t *entry) +{ + long pos; + if (entry) { + if (entry->list) { + /* entry is linked in a list...let's remove that reference */ + pos = get_entry_position(entry); + if (pos >= 0) + remove_entry(entry->list, pos); + } + free(entry); + } +} + +/* + * Pops a list_entry_t from the end of the list (or bottom of the stack + * if you are using the list as a stack) + */ +static inline list_entry_t * +pop_entry(linked_list_t *list) +{ + list_entry_t *entry; + MUTEX_LOCK(list->lock); + + entry = list->tail; + if (entry) { + list->tail = entry->prev; + if (list->tail) + list->tail->next = NULL; + list->length--; + + entry->list = NULL; + entry->prev = NULL; + entry->next = NULL; + + if (list->cur == entry) + list->cur = NULL; + } + if (list->length == 0) + list->head = list->tail = NULL; + + MUTEX_UNLOCK(list->lock); + return entry; +} + +/* + * Pushs a list_entry_t at the end of a list + */ +static inline int +push_entry(linked_list_t *list, list_entry_t *entry) +{ + list_entry_t *p; + if (!entry) + return -1; + MUTEX_LOCK(list->lock); + if (list->length == 0) { + list->head = list->tail = entry; + } + else { + p = list->tail; + p->next = entry; + entry->prev = p; + entry->next = NULL; + list->tail = entry; + } + list->length++; + entry->list = list; + MUTEX_UNLOCK(list->lock); + return 0; +} + +/* + * Retreive a list_entry_t from the beginning of a list (or top of the stack + * if you are using the list as a stack) + */ +static inline list_entry_t * +shift_entry(linked_list_t *list) +{ + list_entry_t *entry; + MUTEX_LOCK(list->lock); + entry = list->head; + if (entry) { + list->head = entry->next; + if (list->head) + list->head->prev = NULL; + list->length--; + + entry->list = NULL; + entry->prev = NULL; + entry->next = NULL; + + if (list->cur == entry) + list->cur = NULL; + else if (list->pos) + list->pos--; + } + if (list->length == 0) + list->head = list->tail = NULL; + MUTEX_UNLOCK(list->lock); + return entry; +} + +/* + * Insert a list_entry_t at the beginning of a list (or at the top if the stack) + */ +static inline int +unshift_entry(linked_list_t *list, list_entry_t *entry) +{ + list_entry_t *p; + if (!entry) + return -1; + MUTEX_LOCK(list->lock); + if (list->length == 0) { + list->head = list->tail = entry; + } + else { + p = list->head; + p->prev = entry; + entry->prev = NULL; + entry->next = p; + list->head = entry; + } + list->length++; + entry->list = list; + if (list->cur) + list->pos++; + MUTEX_UNLOCK(list->lock); + return 0; +} + +/* + * Instert an entry at a specified position in a linked_list_t + */ +static inline int +insert_entry(linked_list_t *list, list_entry_t *entry, size_t pos) +{ + list_entry_t *prev, *next; + int ret = -1; + MUTEX_LOCK(list->lock); + if (pos == 0) { + ret = unshift_entry(list, entry); + } + else if (pos == list->length) { + ret = push_entry(list, entry); + } + else if (pos > list->length) { + unsigned int i; + for (i = list->length; i < pos; i++) { + list_entry_t *emptyEntry = create_entry(); + if (!emptyEntry || push_entry(list, emptyEntry) != 0) { + if (emptyEntry) + destroy_entry(emptyEntry); + MUTEX_UNLOCK(list->lock); + return -1; + } + } + ret = push_entry(list, entry); + } + + if (ret == 0) { + MUTEX_UNLOCK(list->lock); + return ret; + } + + prev = pick_entry(list, pos - 1); + if (prev) { + next = prev->next; + prev->next = entry; + entry->prev = prev; + entry->next = next; + if (next) + next->prev = entry; + list->length++; + ret = 0; + } + MUTEX_UNLOCK(list->lock); + return ret; +} + +/* + * Retreive the list_entry_t at pos in a linked_list_t without removing it from the list + */ +static inline list_entry_t * +pick_entry(linked_list_t *list, size_t pos) +{ + unsigned int i; + list_entry_t *entry; + + MUTEX_LOCK(list->lock); + + if (list->length <= pos) { + MUTEX_UNLOCK(list->lock); + return NULL; + } + + size_t half_length = list->length >> 1; + /* we rely on integer underflow for the argument to abs(). */ + if (list->cur && (size_t)abs((int)(list->pos - pos)) < half_length) { + entry = list->cur; + if (list->pos != pos) { + if (list->pos < pos) { + for (i = list->pos; i < pos; i++) { + entry = entry->next; + } + } + else if (list->pos > pos) { + for (i = list->pos; i > pos; i--) { + entry = entry->prev; + } + } + } + } + else { + if (pos > half_length) { + entry = list->tail; + for (i = list->length - 1; i > pos; i--) { + entry = entry->prev; + } + } + else { + entry = list->head; + for (i = 0; i < pos; i++) { + entry = entry->next; + } + } + } + if (entry) { + list->pos = pos; + list->cur = entry; + } + + MUTEX_UNLOCK(list->lock); + return entry; +} + +/* Retreive the list_entry_t at pos in a linked_list_t removing it from the list + * XXX - no locking here because this routine is just an accessor to other routines + * Caller MUST destroy the returned entry trough destroy_entry() call + */ +static inline list_entry_t * +fetch_entry(linked_list_t *list, size_t pos) +{ + list_entry_t *entry = NULL; + if (pos == 0) + return shift_entry(list); + else if (pos == list_count(list) - 1) + return pop_entry(list); + + entry = remove_entry(list, pos); + return entry; +} + +static inline int +move_entry(linked_list_t *list, size_t srcPos, size_t dstPos) +{ + list_entry_t *e; + + e = fetch_entry(list, srcPos); + if (e) { + if (insert_entry(list, e, dstPos) == 0) + return 0; + else { + if (insert_entry(list, e, srcPos) != 0) { + // fprintf(stderr, "Can't restore entry at index %lu while moving to %lu\n", srcPos, dstPos); + } + } + } + /* TODO - Unimplemented */ + return -1; +} + +/* XXX - still dangerous ... */ +static inline int +swap_entries(linked_list_t *list, size_t pos1, size_t pos2) +{ + list_entry_t *e1; + list_entry_t *e2; + if (pos2 > pos1) { + e2 = fetch_entry(list, pos2); + insert_entry(list, e2, pos1); + e1 = fetch_entry(list, pos1 + 1); + insert_entry(list, e1, pos2); + } + else if (pos1 > pos2) { + e1 = fetch_entry(list, pos1); + insert_entry(list, e1, pos2); + e2 = fetch_entry(list, pos2 + 1); + insert_entry(list, e2, pos1); + } + else + return -1; + + /* TODO - Unimplemented */ + return 0; +} + +/* return old entry at pos */ +static inline list_entry_t * +subst_entry(linked_list_t *list, size_t pos, list_entry_t *entry) +{ + list_entry_t *old; + + MUTEX_LOCK(list->lock); + + old = fetch_entry(list, pos); + if (!old) { + MUTEX_UNLOCK(list->lock); + return NULL; + } + insert_entry(list, entry, pos); + + MUTEX_UNLOCK(list->lock); + /* XXX - NO CHECK ON INSERTION */ + return old; +} + +/* XXX - POSSIBLE RACE CONDITION BETWEEN pick_entry and the actual removal */ +static inline list_entry_t * +remove_entry(linked_list_t *list, size_t pos) +{ + list_entry_t *next, *prev; + list_entry_t *entry = pick_entry(list, pos); + MUTEX_LOCK(list->lock); + if (entry) { + prev = entry->prev; + next = entry->next; + if (pos == 0) + list->head = next; + else if (pos == list->length - 1) + list->tail = prev; + + if (prev) + prev->next = next; + if (next) + next->prev = prev; + + list->length--; + entry->list = NULL; + entry->prev = NULL; + entry->next = NULL; + + if (list->cur == entry) { + list->cur = NULL; + list->pos = 0; + } + else if (list->pos > pos) { + list->pos--; + } + MUTEX_UNLOCK(list->lock); + return entry; + } + MUTEX_UNLOCK(list->lock); + return NULL; +} + +/* return position of entry if linked in a list. + * Scans entire list so it can be slow for very long lists */ +long +get_entry_position(list_entry_t *entry) +{ + int i = 0; + linked_list_t *list; + list_entry_t * p; + list = entry->list; + + if (!list) + return -1; + + MUTEX_LOCK(list->lock); + if (list) { + p = list->head; + while (p) { + if (p == entry) { + MUTEX_UNLOCK(list->lock); + return i; + } + p = p->next; + i++; + } + } + MUTEX_UNLOCK(list->lock); + return -1; +} + +void * +list_pop_value(linked_list_t *list) +{ + void * val = NULL; + list_entry_t *entry = pop_entry(list); + if (entry) { + val = entry->value; + destroy_entry(entry); + } + return val; +} + +int +list_push_value(linked_list_t *list, void *val) +{ + int res; + list_entry_t *new_entry = create_entry(); + if (!new_entry) + return -1; + new_entry->value = val; + res = push_entry(list, new_entry); + if (res != 0) + destroy_entry(new_entry); + return res; +} + +int +list_unshift_value(linked_list_t *list, void *val) +{ + int res; + list_entry_t *new_entry = create_entry(); + if (!new_entry) + return -1; + new_entry->value = val; + res = unshift_entry(list, new_entry); + if (res != 0) + destroy_entry(new_entry); + return res; +} + +void * +list_shift_value(linked_list_t *list) +{ + void * val = NULL; + list_entry_t *entry = shift_entry(list); + if (entry) { + val = entry->value; + destroy_entry(entry); + } + return val; +} + +int +list_insert_value(linked_list_t *list, void *val, size_t pos) +{ + int res; + list_entry_t *new_entry = create_entry(); + if (!new_entry) + return -1; + new_entry->value = val; + res = insert_entry(list, new_entry, pos); + if (res != 0) + destroy_entry(new_entry); + return res; +} + +void * +list_pick_value(linked_list_t *list, size_t pos) +{ + list_entry_t *entry = pick_entry(list, pos); + if (entry) + return entry->value; + return NULL; +} + +void * +list_fetch_value(linked_list_t *list, size_t pos) +{ + void * val = NULL; + list_entry_t *entry = fetch_entry(list, pos); + if (entry) { + val = entry->value; + destroy_entry(entry); + } + return val; +} + +/* just an accessor to move_entry */ +int +list_move_value(linked_list_t *list, size_t srcPos, size_t dstPos) +{ + return move_entry(list, srcPos, dstPos); +} + +void * +list_set_value(linked_list_t *list, size_t pos, void *newval) +{ + void *old_value = NULL; + MUTEX_LOCK(list->lock); + list_entry_t *entry = pick_entry(list, pos); + if (entry) { + old_value = entry->value; + entry->value = newval; + } + else { + list_insert_value(list, newval, pos); + } + MUTEX_UNLOCK(list->lock); + return old_value; +} + +/* return old value at pos */ +void * +list_subst_value(linked_list_t *list, size_t pos, void *newval) +{ + void *old_value = NULL; + MUTEX_LOCK(list->lock); + list_entry_t *entry = pick_entry(list, pos); + if (entry) { + old_value = entry->value; + entry->value = newval; + } + MUTEX_UNLOCK(list->lock); + return old_value; +} + +int +list_swap_values(linked_list_t *list, size_t pos1, size_t pos2) +{ + return swap_entries(list, pos1, pos2); +} + +int +list_foreach_value(linked_list_t *list, int (*item_handler)(void *item, size_t idx, void *user), void *user) +{ + MUTEX_LOCK(list->lock); + slice_t slice = {.list = list, .offset = 0, .length = list->length}; + MUTEX_UNLOCK(list->lock); + return slice_foreach_value(&slice, item_handler, user); +} + +tagged_value_t * +list_create_tagged_value_nocopy(char *tag, void *val) +{ + tagged_value_t *newval = (tagged_value_t *)PDC_calloc(1, sizeof(tagged_value_t)); + if (!newval) { + // fprintf(stderr, "Can't create new tagged value: %s", strerror(errno)); + return NULL; + } + + if (tag) + newval->tag = strdup(tag); + if (val) + newval->value = val; + + return newval; +} + +/* + * Allocates resources for a new tagged_value_t initializing both tag and value + * to what received as argument. + * if vlen is 0 or negative, then val is assumed to be a string and + * strdup is used to copy it. + * Return a pointer to the new allocated tagged_value_t. + */ +tagged_value_t * +list_create_tagged_value(char *tag, void *val, size_t vlen) +{ + tagged_value_t *newval = (tagged_value_t *)PDC_calloc(1, sizeof(tagged_value_t)); + if (!newval) { + // fprintf(stderr, "Can't create new tagged value: %s", strerror(errno)); + return NULL; + } + + if (tag) + newval->tag = strdup(tag); + if (val) { + if (vlen) { + newval->value = PDC_malloc(vlen + 1); + if (newval->value) { + memcpy(newval->value, val, vlen); + memset((char *)newval->value + vlen, 0, 1); + newval->vlen = vlen; + } + else { + // fprintf(stderr, "Can't copy value: %s", strerror(errno)); + free(newval->tag); + free(newval); + return NULL; + } + newval->type = TV_TYPE_BINARY; + } + else { + newval->value = (void *)strdup((char *)val); + newval->vlen = strlen((char *)val); + newval->type = TV_TYPE_STRING; + } + } + return newval; +} + +/* + * Allocates resources for a new tagged_value_t + * containing a linked_list_t instead of a simple buffer. + * This let us define folded linked_list_t and therefore represent + * trees (or a sort of folded hashrefs) + */ +tagged_value_t * +list_create_tagged_sublist(char *tag, linked_list_t *sublist) +{ + tagged_value_t *newval = (tagged_value_t *)PDC_calloc(1, sizeof(tagged_value_t)); + if (!newval) { + // fprintf(stderr, "Can't create new tagged value: %s", strerror(errno)); + return NULL; + } + + if (tag) + newval->tag = strdup(tag); + newval->type = TV_TYPE_LIST; + newval->value = sublist; + return newval; +} + +/* Release resources for tagged_value_t pointed by tval */ +void +list_destroy_tagged_value(tagged_value_t *tval) +{ + list_destroy_tagged_value_internal(tval, NULL); +} + +tagged_value_t * +list_set_tagged_value(linked_list_t *list, char *tag, void *value, size_t len, int copy) +{ + int i; + + tagged_value_t *tval; + if (copy) + tval = list_create_tagged_value(tag, value, len); + else + tval = list_create_tagged_value_nocopy(tag, value); + + MUTEX_LOCK(list->lock); + for (i = 0; i < (int)list->length; i++) { + tagged_value_t *tv = list_pick_tagged_value(list, i); + if (tv && tv->tag && tv->tag[0] == tag[0] && strcmp(tv->tag, tag) == 0) { + MUTEX_UNLOCK(list->lock); + if (!list_set_value(list, i, tval)) { + list_destroy_tagged_value(tval); + return NULL; + } + return tv; + } + } + if (list_push_tagged_value(list, tval) == 0) { + list_destroy_tagged_value(tval); + tval = NULL; + } + MUTEX_UNLOCK(list->lock); + return NULL; +} + +/* Pops a tagged_value_t from the list pointed by list */ +tagged_value_t * +list_pop_tagged_value(linked_list_t *list) +{ + return (tagged_value_t *)list_pop_value(list); +} + +/* + * Pushes a new tagged_value_t into list. user must give a valid tagged_value_t pointer + * created trough a call to create_tagged_value() routine + */ +int +list_push_tagged_value(linked_list_t *list, tagged_value_t *tval) +{ + list_entry_t *new_entry; + int res = 0; + if (tval) { + new_entry = create_entry(); + if (new_entry) { + new_entry->tagged = 1; + new_entry->value = tval; + res = push_entry(list, new_entry); + if (res != 0) + destroy_entry(new_entry); + } + } + return res; +} + +int +list_unshift_tagged_value(linked_list_t *list, tagged_value_t *tval) +{ + int res = 0; + list_entry_t *new_entry; + if (tval) { + new_entry = create_entry(); + if (new_entry) { + new_entry->tagged = 1; + new_entry->value = tval; + res = unshift_entry(list, new_entry); + if (res != 0) + destroy_entry(new_entry); + } + } + return res; +} + +tagged_value_t * +shift_tagged_value(linked_list_t *list) +{ + return (tagged_value_t *)list_shift_value(list); +} + +int +list_insert_tagged_value(linked_list_t *list, tagged_value_t *tval, size_t pos) +{ + int res = 0; + list_entry_t *new_entry; + if (tval) { + new_entry = create_entry(); + if (new_entry) { + new_entry->tagged = 1; + new_entry->value = tval; + res = insert_entry(list, new_entry, pos); + if (res != 0) + destroy_entry(new_entry); + } + } + return res; +} + +tagged_value_t * +list_pick_tagged_value(linked_list_t *list, size_t pos) +{ + return (tagged_value_t *)list_pick_value(list, pos); +} + +tagged_value_t * +list_fetch_tagged_value(linked_list_t *list, size_t pos) +{ + return (tagged_value_t *)list_fetch_value(list, pos); +} + +/* + * ... without removing it from the list + */ +tagged_value_t * +list_get_tagged_value(linked_list_t *list, char *tag) +{ + int i; + tagged_value_t *tval; + for (i = 0; i < (int)list_count(list); i++) { + tval = list_pick_tagged_value(list, i); + if (!tval) { + continue; + } + if (strcmp(tval->tag, tag) == 0) + return tval; + } + return NULL; +} + +/* + * ... without removing it from the list + * USER MUST NOT FREE MEMORY FOR RETURNED VALUES + * User MUST create a new list, pass it as 'values' + * and destroy it when no more needed .... entries + * returned inside the 'values' list MUST not be freed, + * because they reference directly the real entries inside 'list'. + */ +size_t +list_get_tagged_values(linked_list_t *list, char *tag, linked_list_t *values) +{ + int i; + int ret; + tagged_value_t *tval; + ret = 0; + for (i = 0; i < (int)list_count(list); i++) { + tval = list_pick_tagged_value(list, i); + if (!tval) { + continue; + } + if (strcmp(tval->tag, tag) == 0) { + list_push_value(values, tval->value); + ret++; + } + } + return ret; +} + +static inline void +swap_entry_node_val(list_entry_t *p1, list_entry_t *p2) +{ + if (!p1 || !p2) + return; + + void *tmp = p1->value; + p1->value = p2->value; + p2->value = tmp; +} + +static inline void +list_quick_sort(list_entry_t *head, list_entry_t *tail, list_entry_t *pivot, int length, + list_comparator_callback_t comparator) +{ + if (!head || !tail || !pivot || length < 2 || !comparator) + return; + + if (length == 2) { + if (comparator(head->value, tail->value) < 0) + swap_entry_node_val(head, tail); + return; + } + + void * pvalue = pivot->value; + list_entry_t *p1 = head, *p2 = tail; + + for (;;) { + + while (p1 && p1 != pivot && comparator(p1->value, pvalue) > 0) + p1 = p1->next; + + while (p2 && p2 != pivot && comparator(p2->value, pvalue) < 0) + p2 = p2->prev; + + if (p1 == p2 || !p1 || !p2) + break; + + if (p1 == pivot) { + // all the elements on the left of the pivot are smaller + // so we can't just swap values anymore + if (p2->prev) + p2->prev->next = p2->next; + if (p2->next) + p2->next->prev = p2->prev; + + if (pivot->prev) + pivot->prev->next = p2; + else if (pivot == pivot->list->head) + pivot->list->head = p2; + + if (p2 == pivot->list->tail) + pivot->list->tail = p2->prev; + + list_entry_t *tmp = p2->prev; + p2->prev = pivot->prev; + pivot->prev = p2; + if (p2->prev) + p2->prev->next = p2; + + p2->next = pivot; + if (p2->next == head) + head = p2; + if (p2 == tail) + tail = tmp; + p2 = tmp; + + if (p1 != pivot) + p1 = p1->next; + } + else if (p2 == pivot) { + // all the elements on the right of the pivot are bigger + // so we can't just swap values anymore + if (p1->prev) + p1->prev->next = p1->next; + if (p1->next) + p1->next->prev = p1->prev; + + if (pivot->next) + pivot->next->prev = p1; + else if (pivot == pivot->list->tail) + pivot->list->tail = p1; + + if (p1 == pivot->list->head) + pivot->list->head = p1->next; + + list_entry_t *tmp = p1->next; + p1->next = pivot->next; + pivot->next = p1; + if (p1->next) + p1->next->prev = p1; + + p1->prev = pivot; + if (p1->prev == tail) + tail = p1; + if (p1 == head) + head = tmp; + p1 = tmp; + + if (p2 != pivot) + p2 = p2->prev; + } + else { + swap_entry_node_val(p1, p2); + + if (p1 != pivot) + p1 = p1->next; + if (p2 != pivot) + p2 = p2->prev; + } + } + + // TODO - optimize the pivot selection on the sublists + // (it could be done while traversing the list + // earlier in this function) + int l1 = 0; + p1 = head; + while (p1 != pivot) { + p1 = p1->next; + l1++; + } + int l2 = length - (l1 + 1); + int i; + list_entry_t *pv1 = head, *pv2 = tail; + for (i = 0; pv1 && pv1->next && i < l1 / 2; ++i) + pv1 = pv1->next; + for (i = 0; pv2 && pv2->prev && i < l2 / 2; ++i) + pv2 = pv2->prev; + + // recursion here + if (l1 > 1 && pivot->prev && head != pivot->prev) + list_quick_sort(head, pivot->prev, pv1, l1, comparator); + if (l2 > 1 && pivot->next && tail != pivot->next) + list_quick_sort(pivot->next, tail, pv2, l2, comparator); +} + +void +list_sort(linked_list_t *list, list_comparator_callback_t comparator) +{ + MUTEX_LOCK(list->lock); + list_entry_t *pivot = pick_entry(list, (list->length / 2) - 1); + list_quick_sort(list->head, list->tail, pivot, list->length, comparator); + list->cur = NULL; + list->pos = 0; + MUTEX_UNLOCK(list->lock); +} + +// size_t +// get_mem_usage_by_all_linkedlist() +// { +// return mem_usage_by_all_linkedlist; +// } + +slice_t * +slice_create(linked_list_t *list, size_t offset, size_t length) +{ + slice_t *slice = PDC_calloc(1, sizeof(slice_t)); + slice->list = list; + slice->offset = offset; + slice->length = length; + list_entry_t *e = create_entry(); + e->value = slice; + list_entry_t *cur = list->slices; + if (!cur) { + list->slices = e; + } + else { + while (cur->next) + cur = cur->next; + cur->next = e; + e->prev = cur; + } + + return slice; +} + +void +slice_destroy(slice_t *slice) +{ + linked_list_t *list = slice->list; + list_entry_t * cur = list->slices; + list_entry_t * prev = NULL; + while (cur) { + if (cur->value == slice) { + if (prev) { + prev->next = cur->next; + cur->next->prev = prev; + } + else { + list->slices = cur->next; + } + destroy_entry(cur); + break; + } + prev = cur; + cur = cur->next; + } + free(slice); +} + +int +slice_foreach_value(slice_t *slice, int (*item_handler)(void *item, size_t idx, void *user), void *user) +{ + linked_list_t *list = slice->list; + MUTEX_LOCK(list->lock); + size_t idx = 0; + list_entry_t *e = pick_entry(list, slice->offset); + while (e && idx < slice->length) { + int rc = item_handler(e->value, idx++, user); + if (rc == 0) { + break; + } + else if (rc == -1 || rc == -2) { + list_entry_t *d = e; + e = e->next; + if (list->head == list->tail && list->tail == d) { + list->head = list->tail = NULL; + } + else if (d == list->head) { + list->head = d->next; + list->head->prev = NULL; + } + else if (d == list->tail) { + list->tail = d->prev; + list->tail->next = NULL; + } + else { + e->prev = d->prev; + e->prev->next = e; + } + d->list = NULL; + if (list->cur == d) + list->cur = NULL; + list->length--; + slice->length--; + // the callback got the value and will take care of releasing it + destroy_entry(d); + if (rc == -2) // -2 means : remove and stop the iteration + break; + // -1 instead means that we still want to remove the item + // but we also want to go ahead with the iteration + } + else { + e = e->next; + } + } + MUTEX_UNLOCK(list->lock); + return idx; +} + +// vim: tabstop=4 shiftwidth=4 expandtab: +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ diff --git a/src/commons/collections/libhl/rbtree.c b/src/commons/collections/libhl/rbtree.c new file mode 100644 index 000000000..b6a1f3fab --- /dev/null +++ b/src/commons/collections/libhl/rbtree.c @@ -0,0 +1,1064 @@ +#include +#include +#include +#include +#include +#include +#include "rbtree.h" +#include "pdc_malloc.h" +#include "comparators.h" + +#define IS_BLACK(_n) (!(_n) || (_n)->color == RBTREE_COLOR_BLACK) +#define IS_RED(_n) ((_n) && (_n)->color == RBTREE_COLOR_RED) + +#define PAINT_BLACK(_n) \ + { \ + if (_n) \ + (_n)->color = RBTREE_COLOR_BLACK; \ + } +#define PAINT_RED(_n) \ + { \ + if (_n) \ + (_n)->color = RBTREE_COLOR_RED; \ + } + +size_t mem_usage_by_all_rbtrees; + +typedef enum { + RBTREE_COLOR_RED = 0, + RBTREE_COLOR_BLACK, +} rbt_color_t; + +typedef struct _rbt_node_s { + rbt_color_t color; + void * key; + size_t klen; + void * value; + struct _rbt_node_s *left; + struct _rbt_node_s *right; + struct _rbt_node_s *parent; +} rbt_node_t; + +struct _rbt_s { + rbt_node_t * root; + uint64_t size; + pdc_c_var_type_t dtype; + libhl_cmp_callback_t cmp_keys_cb; + rbt_free_value_callback_t free_value_cb; + double time_for_rotate; +}; + +rbt_t * +rbt_create_by_dtype(pdc_c_var_type_t dtype, rbt_free_value_callback_t free_value_cb) +{ + rbt_t *rbt = calloc(1, sizeof(rbt_t)); + // INIT_PERF_INFO_FIELDS(rbt, rbt_t); + // rbt->time_for_rotate = 0; + // mem_usage_by_all_rbtrees += sizeof(rbt_t); + + if (!rbt) + return NULL; + rbt->dtype = dtype; + rbt->free_value_cb = free_value_cb; + rbt->cmp_keys_cb = LIBHL_CMP_CB(dtype); + return rbt; +} + +rbt_t * +rbt_create(libhl_cmp_callback_t cmp_keys_cb, rbt_free_value_callback_t free_value_cb) +{ + rbt_t *rbt = calloc(1, sizeof(rbt_t)); + // INIT_PERF_INFO_FIELDS(rbt, rbt_t); + // rbt->time_for_rotate = 0; + // mem_usage_by_all_rbtrees += sizeof(rbt_t); + + if (!rbt) + return NULL; + rbt->free_value_cb = free_value_cb; + rbt->cmp_keys_cb = cmp_keys_cb; + rbt->dtype = PDC_UNKNOWN; // if created via this function, we must set the dtype later. + return rbt; +} + +void +rbt_set_dtype(rbt_t *rbt, pdc_c_var_type_t dtype) +{ + rbt->dtype = dtype; +} + +pdc_c_var_type_t +rbt_get_dtype(rbt_t *rbt) +{ + return rbt->dtype; +} + +static inline void +rbt_destroy_internal(rbt_node_t *node, rbt_free_value_callback_t free_value_cb) +{ + if (!node) + return; + + rbt_destroy_internal(node->left, free_value_cb); + node->left = NULL; + rbt_destroy_internal(node->right, free_value_cb); + node->right = NULL; + + if (node->key != NULL) { + PDC_free(node->key); + node->key = NULL; + if (free_value_cb) { + free_value_cb(node->value); + node->value = NULL; + } + } + + if (node->key == NULL) { + free(node); + } +} + +void +rbt_destroy(rbt_t *rbt) +{ + if (rbt == NULL) { + return; + } + // if (rbt->size == 0) { + // goto done; + // } + rbt_destroy_internal(rbt->root, rbt->free_value_cb); + // done: + rbt->root = NULL; + free(rbt); +} + +static int +rbt_range_walk_internal(rbt_t *rbt, rbt_node_t *node, void *begin_key, size_t bgk_size, void *end_key, + size_t edk_size, int sorted, rbt_walk_callback cb, void *priv, int beginInclusive, + int endInclusive) +{ + if (!node) + return 0; + + if (begin_key != NULL && end_key != NULL && + rbt->cmp_keys_cb(begin_key, bgk_size, end_key, edk_size) >= 0) { + return 0; + } + + int rc = 1; + int cbrc = 1; + + if (sorted && node->left) { + int rrc = rbt_range_walk_internal(rbt, node->left, begin_key, bgk_size, end_key, edk_size, sorted, cb, + priv, beginInclusive, endInclusive); + if (rrc == 0) + return rc + 1; + rc += rrc; + } + + int cmp_begin = begin_key != NULL ? rbt->cmp_keys_cb(node->key, node->klen, begin_key, bgk_size) : 1; + int cmp_end = end_key != NULL ? rbt->cmp_keys_cb(node->key, node->klen, end_key, edk_size) : -1; + + // current node = begin : no need for left child, collect + // current node < begin : no need for left child + // current node > begin : go for left child, if current node < end, collect + // current node = end : no need for right child, do not collect + // current node < end : go for the right child, if current node >= begin, collect + // current node > end : no need for right child + + // Decide to go left or right based on comparisons and include flags + int go_for_left = cmp_begin > 0; // || (beginInclusive && cmp_begin == 0); + int go_for_right = cmp_end < 0; // || (endInclusive && cmp_end == 0); + + // If the current node is within the range, or matches begin/end based on include flags, call the callback + if ((cmp_begin > 0 && cmp_end < 0) || (beginInclusive && cmp_begin == 0) || + (endInclusive && cmp_end == 0)) { + cbrc = cb(rbt, node->key, node->klen, node->value, priv); + } + + // int go_for_left = 1; + // int go_for_right = 1; + // if (cmp_begin == 0 && beginInclusive) { + // cbrc = cb(rbt, node->key, node->klen, node->value, priv); + // go_for_right = 1; + // } + // else if (cmp_begin > 0 && (cmp_end < 0 || (endInclusive && cmp_end == 0))) { + // cbrc = cb(rbt, node->key, node->klen, node->value, priv); + // go_for_right = 1; + // } + // else if (cmp_begin < 0) { + // go_for_left = 1; + // } + + // if (rbt->cmp_keys_cb(node->key, node->klen, begin_key, bgk_size) == 0) { + // cbrc = cb(rbt, node->key, node->klen, node->value, priv); + // go_for_right = 1; + // } + // else if (rbt->cmp_keys_cb(node->key, node->klen, begin_key, bgk_size) > 0) { + // if (rbt->cmp_keys_cb(node->key, node->klen, end_key, edk_size) < 0) { + // cbrc = cb(rbt, node->key, node->klen, node->value, priv); + // go_for_right = 1; + // } + // go_for_left = 1; + // } + + switch (cbrc) { + case RBT_WALK_DELETE_AND_STOP: + rbt_remove(rbt, node->key, node->klen, NULL); + return 0; + case RBT_WALK_DELETE_AND_CONTINUE: { + if (node->left && node->right) { + rbt_remove(rbt, node->key, node->klen, NULL); + return rbt_range_walk_internal(rbt, node, begin_key, bgk_size, end_key, edk_size, sorted, cb, + priv, beginInclusive, endInclusive); + } + else if (node->left || node->right) { + return rbt_range_walk_internal(rbt, node->left ? node->left : node->right, begin_key, + bgk_size, end_key, edk_size, sorted, cb, priv, beginInclusive, + endInclusive); + } + // this node was a leaf + return 1; + } + case RBT_WALK_STOP: + return 0; + case RBT_WALK_CONTINUE: + break; + default: + // TODO - Error Messages + break; + } + + if (!sorted && go_for_left && node->left) { + int rrc = rbt_range_walk_internal(rbt, node->left, begin_key, bgk_size, end_key, edk_size, sorted, cb, + priv, beginInclusive, endInclusive); + if (rrc == 0) + return rc + 1; + rc += rrc; + } + + if (go_for_right && node->right) { + int rrc = rbt_range_walk_internal(rbt, node->right, begin_key, bgk_size, end_key, edk_size, sorted, + cb, priv, beginInclusive, endInclusive); + if (rrc == 0) + return rc + 1; + rc += rrc; + } + return rc; +} + +static int +rbt_walk_internal(rbt_t *rbt, rbt_node_t *node, int sorted, rbt_walk_callback cb, void *priv) +{ + if (!node) + return 0; + + int rc = 1; + int cbrc = 0; + + if (sorted && node->left) { + int rrc = rbt_walk_internal(rbt, node->left, sorted, cb, priv); + if (rrc == 0) + return rc + 1; + rc += rrc; + } + + cbrc = cb(rbt, node->key, node->klen, node->value, priv); + switch (cbrc) { + case RBT_WALK_DELETE_AND_STOP: + rbt_remove(rbt, node->key, node->klen, NULL); + return 0; + case RBT_WALK_DELETE_AND_CONTINUE: { + if (node->left && node->right) { + rbt_remove(rbt, node->key, node->klen, NULL); + return rbt_walk_internal(rbt, node, sorted, cb, priv); + } + else if (node->left || node->right) { + return rbt_walk_internal(rbt, node->left ? node->left : node->right, sorted, cb, priv); + } + // this node was a leaf + return 1; + } + case RBT_WALK_STOP: + return 0; + case RBT_WALK_CONTINUE: + break; + default: + // TODO - Error Messages + break; + } + + if (!sorted && node->left) { + int rrc = rbt_walk_internal(rbt, node->left, sorted, cb, priv); + if (rrc == 0) + return rc + 1; + rc += rrc; + } + + if (node->right) { + int rrc = rbt_walk_internal(rbt, node->right, sorted, cb, priv); + if (rrc == 0) + return rc + 1; + rc += rrc; + } + + return rc; +} + +int +rbt_walk(rbt_t *rbt, rbt_walk_callback cb, void *priv) +{ + int rst = 0; + if (rbt->root) + rst = rbt_walk_internal(rbt, rbt->root, 0, cb, priv); + + // rbt->num_of_comparisons += rst; + return rst; +} + +int +rbt_walk_sorted(rbt_t *rbt, rbt_walk_callback cb, void *priv) +{ + int rst = 0; + if (rbt->root) + rst = rbt_walk_internal(rbt, rbt->root, 1, cb, priv); + + // rbt->num_of_comparisons += rst; + return rst; +} + +int +rbt_range_walk(rbt_t *rbt, void *begin_key, size_t bgk_size, void *end_key, size_t edk_size, + rbt_walk_callback cb, void *priv, int beginInclusive, int endInclusive) +{ + int rst = 0; + if (rbt->root) + rst = rbt_range_walk_internal(rbt, rbt->root, begin_key, bgk_size, end_key, edk_size, 0, cb, priv, + beginInclusive, endInclusive); + + // rbt->num_of_comparisons += rst; + return rst; +} + +int +rbt_range_walk_sorted(rbt_t *rbt, void *begin_key, size_t bgk_size, void *end_key, size_t edk_size, + rbt_walk_callback cb, void *priv, int beginInclusive, int endInclusive) +{ + int rst = 0; + if (rbt->root) + rst = rbt_range_walk_internal(rbt, rbt->root, begin_key, bgk_size, end_key, edk_size, 1, cb, priv, + beginInclusive, endInclusive); + + // rbt->num_of_comparisons += rst; + return rst; +} + +int +rbt_range_lt(rbt_t *rbt, void *end_key, size_t edk_size, rbt_walk_callback cb, void *priv, int end_inclusive) +{ + int rst = 0; + if (rbt->root) + rst = rbt_range_walk_internal(rbt, rbt->root, NULL, 0, end_key, edk_size, 0, cb, priv, 1, + end_inclusive); + + // rbt->num_of_comparisons += rst; + return rst; +} + +int +rbt_range_gt(rbt_t *rbt, void *begin_key, size_t bgk_size, rbt_walk_callback cb, void *priv, + int begin_inclusive) +{ + int rst = 0; + if (rbt->root) + rst = rbt_range_walk_internal(rbt, rbt->root, begin_key, bgk_size, NULL, 0, 0, cb, priv, + begin_inclusive, 1); + // rbt->num_of_comparisons += rst; + return rst; +} + +int +rbt_range_lt_sorted(rbt_t *rbt, void *end_key, size_t edk_size, rbt_walk_callback cb, void *priv, + int end_inclusive) +{ + int rst = 0; + if (rbt->root) + rst = rbt_range_walk_internal(rbt, rbt->root, NULL, 0, end_key, edk_size, 1, cb, priv, 1, + end_inclusive); + // rbt->num_of_comparisons += rst; + return rst; +} + +int +rbt_range_gt_sorted(rbt_t *rbt, void *begin_key, size_t bgk_size, rbt_walk_callback cb, void *priv, + int begin_inclusive) +{ + int rst = 0; + if (rbt->root) + rst = rbt_range_walk_internal(rbt, rbt->root, begin_key, bgk_size, NULL, 0, 1, cb, priv, + begin_inclusive, 1); + // rbt->num_of_comparisons += rst; + return rst; +} + +static inline rbt_node_t * +rbt_grandparent(rbt_node_t *node) +{ + if (node && node->parent) + return node->parent->parent; + return NULL; +} + +static inline rbt_node_t * +rbt_uncle(rbt_node_t *node) +{ + rbt_node_t *gp = rbt_grandparent(node); + if (!gp) + return NULL; + if (node->parent == gp->left) + return gp->right; + else + return gp->left; +} + +static inline int +rbt_compare_keys(rbt_t *rbt, void *k1, size_t k1size, void *k2, size_t k2size) +{ + // stopwatch_t t_locate; + // timer_start(&t_locate); + int rc; + if (rbt->cmp_keys_cb) { + rc = rbt->cmp_keys_cb(k1, k1size, k2, k2size); + } + else { + if (k1size != k2size) { + if (k2size > k1size) { + rc = memcmp(k1, k2, k1size) - (k2size - k1size); + } + else { + rc = memcmp(k1, k2, k2size) + (k1size - k2size); + } + } + else { + rc = memcmp(k1, k2, k1size); + } + } + // timer_pause(&t_locate); + // rbt->time_to_locate += timer_delta_ns(&t_locate); + // rbt->num_of_comparisons++; + return rc; +} + +static int +rbt_add_internal(rbt_t *rbt, rbt_node_t *cur_node, rbt_node_t *new_node) +{ + int rc = rbt_compare_keys(rbt, cur_node->key, cur_node->klen, new_node->key, new_node->klen); + + if (rc == 0) { + // key matches, just set the new value + new_node->parent = cur_node->parent; + new_node->color = cur_node->color; + if (new_node->parent) { + if (new_node->parent->left == cur_node) + new_node->parent->left = new_node; + else + new_node->parent->right = new_node; + } + new_node->left = cur_node->left; + new_node->right = cur_node->right; + + if (new_node->left) + new_node->left->parent = new_node; + + if (new_node->right) + new_node->right->parent = new_node; + + if (new_node->value != cur_node->value && rbt->free_value_cb) + rbt->free_value_cb(cur_node->value); + + free(cur_node->key); + free(cur_node); + return 1; + } + else if (rc > 0) { + if (cur_node->left) { + return rbt_add_internal(rbt, cur_node->left, new_node); + } + else { + cur_node->left = new_node; + new_node->parent = cur_node; + } + } + else { + if (cur_node->right) { + return rbt_add_internal(rbt, cur_node->right, new_node); + } + else { + cur_node->right = new_node; + new_node->parent = cur_node; + } + } + return 0; +} + +static inline void +rbt_rotate_right(rbt_t *rbt, rbt_node_t *node) +{ + rbt_node_t *p = node->left; + node->left = p ? p->right : NULL; + if (p) + p->right = node; + + if (node->left) + node->left->parent = node; + + rbt_node_t *parent = node->parent; + node->parent = p; + if (p) { + p->parent = parent; + if (p->parent == NULL) { + rbt->root = p; + } + else { + if (parent->left == node) + parent->left = p; + else + parent->right = p; + } + } + else { + rbt->root = node; + } +} + +static inline void +rbt_rotate_left(rbt_t *rbt, rbt_node_t *node) +{ + rbt_node_t *p = node->right; + node->right = p ? p->left : NULL; + if (p) + p->left = node; + + if (node->right) + node->right->parent = node; + + rbt_node_t *parent = node->parent; + node->parent = p; + if (p) { + p->parent = parent; + if (p->parent == NULL) { + rbt->root = p; + } + else { + if (parent->left == node) + parent->left = p; + else + parent->right = p; + } + } + else { + rbt->root = node; + } +} + +int +rbt_add(rbt_t *rbt, void *k, size_t klen, void *v) +{ + int rc = 0; + // stopwatch_t t_expand; + // timer_start(&t_expand); + + rbt_node_t *node = PDC_calloc(1, sizeof(rbt_node_t)); + mem_usage_by_all_rbtrees += sizeof(rbt_node_t); + if (!node) + return -1; + + // rbt->num_of_reallocs++; + node->key = PDC_malloc(klen); + mem_usage_by_all_rbtrees += klen; + if (!node->key) { + free(node); + return -1; + } + memcpy(node->key, k, klen); + node->klen = klen; + node->value = v; + + // timer_pause(&t_expand); + // rbt->time_for_expansion += timer_delta_ns(&t_expand); + // rbt->num_of_comparisons++; + if (!rbt->root) { + PAINT_BLACK(node); + rbt->root = node; + } + else { + rc = rbt_add_internal(rbt, rbt->root, node); + // rbt->num_of_comparisons++; + if (IS_BLACK(node)) { + // if the node just added is now black it means + // it was already existing and this was only a value update + if (!node->parent) { + // we need to check also if the root pointer + // should be updated as well + rbt->root = node; + } + return 1; + } + // rbt->num_of_comparisons++; + if (!node->parent) { + // case 1 + PAINT_BLACK(node); + rbt->root = node; + } + else if (IS_BLACK(node->parent)) { + // case 2 + return rc; + } + else { + // case 3 + rbt_node_t *uncle = rbt_uncle(node); + rbt_node_t *grandparent = rbt_grandparent(node); + // rbt->num_of_comparisons++; + if (IS_RED(uncle)) { + PAINT_BLACK(node->parent); + PAINT_BLACK(uncle); + if (grandparent) { + PAINT_RED(grandparent); + rbt_add(rbt, grandparent->key, grandparent->klen, grandparent->value); + } + } + else if (grandparent) { + // case 4 + // rbt->num_of_comparisons++; + if (node == node->parent->right && node->parent == grandparent->left) { + rbt_rotate_left(rbt, node->parent); + node = node->left; + } + else if (node == node->parent->left && node->parent == grandparent->right) { + rbt_rotate_right(rbt, node->parent); + node = node->right; + } + // case 5 + grandparent = rbt_grandparent(node); + if (node->parent) { + PAINT_BLACK(node->parent); + PAINT_RED(grandparent); + // rbt->num_of_comparisons++; + if (node == node->parent->left) + rbt_rotate_right(rbt, grandparent); + else + rbt_rotate_left(rbt, grandparent); + } + else { + fprintf(stderr, "Corrupted tree\n"); + return -1; + } + } + } + } + if (rc == 0) { + rbt->size = rbt->size + 1; + } + return rc; +} + +static rbt_node_t ** +rbt_find_internal(rbt_t *rbt, rbt_node_t **node, void *key, size_t klen) +{ + if (node == NULL || !(*node)) + return NULL; + + if ((*node)->key == NULL) { + return NULL; + } + + int rc = rbt_compare_keys(rbt, (*node)->key, (*node)->klen, key, klen); + + if (rc == 0) { + return node; + } + else if (rc > 0) { + return rbt_find_internal(rbt, &((*node)->left), key, klen); + } + else { + return rbt_find_internal(rbt, &((*node)->right), key, klen); + } + + return NULL; +} + +int +rbt_find(rbt_t *rbt, void *k, size_t klen, void **v) +{ + rbt_node_t **node = rbt_find_internal(rbt, &(rbt->root), k, klen); + if (node == NULL || !(*node)) + return -1; + + *v = (*node)->value; + return 0; +} + +static inline rbt_node_t * +rbt_sibling(rbt_node_t *node) +{ + return (node == node->parent->left) ? node->parent->right : node->parent->left; +} + +static inline rbt_node_t ** +rbt_find_next(rbt_node_t *node) +{ + if (!node->right) + return NULL; + + rbt_node_t **next = &(node->right); + + while ((*next)->left) + next = &((*next)->left); + + return next; +} + +static inline rbt_node_t ** +rbt_find_prev(rbt_node_t *node) +{ + if (!node->left) + return NULL; + + rbt_node_t **prev = &(node->left); + + while ((*prev)->right) + prev = &((*prev)->right); + + return prev; +} + +static void +rbt_paint_onremove(rbt_t *rbt, rbt_node_t *node) +{ + if (!node) + return; + + // delete case 1 + if (node->parent != NULL) { + // delete case 2 + rbt_node_t *sibling = rbt_sibling(node); + if (IS_RED(sibling)) { + PAINT_RED(node->parent); + PAINT_BLACK(sibling); + if (node == node->parent->left) { + rbt_rotate_left(rbt, node->parent); + } + else { + rbt_rotate_right(rbt, node->parent); + } + } + + // delete case 3 + if (IS_BLACK(node->parent) && sibling && IS_BLACK(sibling) && IS_BLACK(sibling->left) && + IS_BLACK(sibling->right)) { + PAINT_RED(sibling); + rbt_paint_onremove(rbt, node->parent); + } + else { + // delete case 4 + if (IS_RED(node->parent) && sibling && IS_BLACK(sibling) && IS_BLACK(sibling->left) && + IS_BLACK(sibling->right)) { + PAINT_RED(sibling); + PAINT_BLACK(node->parent); + } + else { + // delete case 5 + if (IS_BLACK(sibling)) { + if (node == node->parent->left && sibling && IS_BLACK(sibling->right) && + IS_RED(sibling->left)) { + PAINT_RED(sibling); + PAINT_BLACK(sibling->left); + rbt_rotate_right(rbt, sibling); + } + else if (node == node->parent->right && sibling && IS_BLACK(sibling->left) && + IS_RED(sibling->right)) { + PAINT_RED(sibling); + PAINT_BLACK(sibling->right); + rbt_rotate_left(rbt, sibling); + } + } + // delete case 6 + if (sibling) + sibling->color = node->parent->color; + PAINT_BLACK(node->parent); + if (node == node->parent->left) { + if (sibling) + PAINT_BLACK(sibling->right); + rbt_rotate_left(rbt, node->parent); + } + else { + if (sibling) + PAINT_BLACK(sibling->left); + rbt_rotate_right(rbt, node->parent); + } + } + } + } +} + +void +rbt_free_node(rbt_t *rbt, rbt_node_t **node_ptr, void **node_v, void **rtn_v) +{ + free((*node_ptr)->key); + (*node_ptr)->key = NULL; + if (rtn_v) + *rtn_v = *node_v; + else if (rbt->free_value_cb) { + rbt->free_value_cb(*node_v); + *node_v = NULL; + } + free(*node_ptr); + *node_ptr = NULL; +} + +int +rbt_remove(rbt_t *rbt, void *k, size_t klen, void **v) +{ + rbt_node_t **node = rbt_find_internal(rbt, &(rbt->root), k, klen); + if (node == NULL || !(*node)) + return -1; + + if ((*node)->left || (*node)->right) { + // the node is not a leaf + // now check if it has two children or just one + if ((*node)->left && (*node)->right) { + // two children case + rbt_node_t **n = NULL; + static int prevnext = 0; + int isprev = (prevnext++ % 2 == 0); + if (isprev) + n = rbt_find_prev((*node)); + else + n = rbt_find_next((*node)); + void *new_key = PDC_realloc((*node)->key, (*n)->klen); + mem_usage_by_all_rbtrees += (*n)->klen; + if (!new_key) + return -1; + (*node)->key = new_key; + memcpy((*node)->key, (*n)->key, (*n)->klen); + void *prev_value = (*node)->value; + (*node)->value = (*n)->value; + if (isprev) { + if (*n == (*node)->left) { + (*node)->left = (*n)->left; + } + else { + (*n)->parent->right = (*n)->left; + } + if (n && *n && (*n)->left) { + (*n)->left->parent = *node; + } + } + else { + if ((*n) == (*node)->right) { + (*node)->right = (*n)->right; + } + else { + (*n)->parent->left = (*n)->right; + } + if (n && *n && (*n)->right) { + (*n)->right->parent = *node; + } + } + + if (n && *n) { + rbt_free_node(rbt, n, &prev_value, v); + } + + // free((*n)->key); + // (*n)->key = NULL; + // if (v) + // *v = prev_value; + // else if (rbt->free_value_cb) { + // rbt->free_value_cb(prev_value); + // (&prev_value)[0] = NULL; + // } + + // free((*n)); + // *n = NULL; + rbt->size = rbt->size - 1; + return 0; + } + else { + // one child case + rbt_node_t *child = (*node)->right ? (*node)->right : (*node)->left; + // replace node with child + child->parent = (*node)->parent; + if (child->parent) { + if ((*node) == (*node)->parent->left) + (*node)->parent->left = child; + else + (*node)->parent->right = child; + } + if (IS_BLACK((*node))) { + if (IS_RED(child)) { + PAINT_BLACK(child); + } + else { + rbt_paint_onremove(rbt, child); + } + } + + if (node && *node) { + rbt_free_node(rbt, node, &((*node)->value), v); + } + + // if (v) + // *v = (*node)->value; + // else if (rbt->free_value_cb) { + // rbt->free_value_cb((*node)->value); + // (*node)->value = NULL; + // } + + // free((*node)->key); + // (*node)->key = NULL; + // free((*node)); + // *node = NULL; + rbt->size = rbt->size - 1; + return 0; + } + } + + // if it's not the root node we need to update the parent + if ((*node)->parent) { + if (*node == (*node)->parent->left) + (*node)->parent->left = NULL; + else + (*node)->parent->right = NULL; + } + + if (node && *node) { + rbt_free_node(rbt, node, &((*node)->value), v); + } + // if (v) + // *v = (*node)->value; + // else if (rbt->free_value_cb && (*node)->value) { + // rbt->free_value_cb((*node)->value); + // (*node)->value = NULL; + // } + + // free(node->key); + // node->key = NULL; + // // free(node); + // // (&node)[0] = NULL; + rbt->size = rbt->size - 1; + return 0; +} + +uint64_t +rbt_size(rbt_t *rbt) +{ + if (rbt == NULL) { + return 0; + } + return rbt->size; +} + +#ifdef DEBUG_RBTREE +static int +rbt_print_internal(rbt_node_t *node, int is_left, int offset, int depth, char s[20][255]) +{ + char b[20]; + memset(b, 0, sizeof(b)); + + if (!node) + return 0; + + sprintf(b, "(%d %C)", *((int *)node->value), node->color ? 'B' : 'R'); + int width = strlen(b); + + int left = rbt_print_internal(node->left, 1, offset, depth + 1, s); + int right = rbt_print_internal(node->right, 0, offset + left + width, depth + 1, s); + + int i; + +#ifdef DEBUG_RBTREE_COMPACT + for (i = 0; i < width; i++) + s[depth][offset + left + i] = b[i]; + + if (depth && is_left) { + + for (i = 0; i < width + right; i++) + s[depth - 1][offset + left + width / 2 + i] = '-'; + + s[depth - 1][offset + left + width / 2] = '.'; + } + else if (depth && !is_left) { + + for (i = 0; i < left + width; i++) + s[depth - 1][offset - width / 2 + i] = '-'; + + s[depth - 1][offset + left + width / 2] = '.'; + } +#else + for (i = 0; i < width; i++) + s[2 * depth][offset + left + i] = b[i]; + + if (depth && is_left) { + + for (i = 0; i < width + right; i++) + s[2 * depth - 1][offset + left + width / 2 + i] = '-'; + + s[2 * depth - 1][offset + left + width / 2] = '+'; + s[2 * depth - 1][offset + left + width + right + width / 2] = '+'; + } + else if (depth && !is_left) { + + for (i = 0; i < left + width; i++) + s[2 * depth - 1][offset - width / 2 + i] = '-'; + + s[2 * depth - 1][offset + left + width / 2] = '+'; + s[2 * depth - 1][offset - width / 2 - 1] = '+'; + } +#endif + + return left + width + right; +} + +void +rbt_print(rbt_t *rbt) +{ + int i; + char s[20][255]; + memset(s, 0, sizeof(s)); + + struct winsize w; + ioctl(0, TIOCGWINSZ, &w); + + char format[16]; + snprintf(format, sizeof(format), "%%%ds", w.ws_col); + for (i = 0; i < 20; i++) + sprintf(s[i], format, " "); + + rbt_print_internal(rbt->root, 0, 0, 0, s); + + for (i = 0; i < 20; i++) + printf("%s\n", s[i]); +} +#endif + +// perf_info_t * +// get_perf_info_rbtree(rbt_t *index_root) +// { +// GET_PERF_INFO(index_root); +// } + +// void +// reset_perf_info_counters_rbtree(rbt_t *rbt) +// { +// RESET_PERF_INFO_COUNTERS(rbt); +// } + +// size_t +// get_mem_usage_by_all_rbtrees() +// { +// return mem_usage_by_all_rbtrees; +// } + +// vim: tabstop=4 shiftwidth=4 expandtab: +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ diff --git a/src/commons/collections/pdc_set_test.c b/src/commons/collections/pdc_set_test.c new file mode 100644 index 000000000..6798d7385 --- /dev/null +++ b/src/commons/collections/pdc_set_test.c @@ -0,0 +1,66 @@ +#include "pdc_set.h" +#include "pdc_compare.h" +#include "pdc_hash.h" +#include +#include +#include +#include +#include + +void +set_value_free(SetValue value) +{ + free((void *)value); +} + +int +main(int argc, char **argv) +{ + // read the max id from the command line + char * endptr; + uint64_t max_id = strtoull(argv[1], &endptr, 10); + if (*endptr != '\0') { + fprintf(stderr, "Invalid number: %s\n", argv[1]); + return 1; + } + + Set *set = set_new(ui64_hash, ui64_equal); + set_register_free_function(set, set_value_free); + + for (uint64_t i = 0; i < max_id; i++) { + uint64_t *value = malloc(sizeof(uint64_t)); + *value = i; + set_insert(set, value); + } + + // test if the size of set is correct + + if (set_num_entries(set) != (unsigned int)max_id) { + printf("Error: set size is not correct\n"); + return 1; + } + + // retrieve all values from the set + for (uint64_t i = 0; i < max_id; i++) { + uint64_t *value = malloc(sizeof(uint64_t)); + *value = i; + if (!set_query(set, value)) { + printf("Error: value %" PRIu64 " not found in the set\n", i); + return 1; + } + } + + // iterate through all values + SetIterator *it; + set_iterate(set, it); + while (set_iter_has_more(it)) { + uint64_t *value = set_iter_next(it); + if (!set_query(set, value)) { + printf("Error: value %" PRIu64 " not found in the set\n", *value); + return 1; + } + } + + set_free(set); + return 0; +} \ No newline at end of file diff --git a/src/commons/file/bin_file_ops.c b/src/commons/file/bin_file_ops.c new file mode 100644 index 000000000..fcc695aac --- /dev/null +++ b/src/commons/file/bin_file_ops.c @@ -0,0 +1,225 @@ +#include "bin_file_ops.h" + +// type 1 int, 2 double, 3 string, 4 uint64, 5 size_t + +void +bin_append_int(int data, FILE *stream) +{ + int type = 1; + size_t length = 1; + fwrite(&type, sizeof(int), 1, stream); + fwrite(&length, sizeof(size_t), 1, stream); + fwrite(&data, sizeof(int), length, stream); +} + +void +bin_append_double(double data, FILE *stream) +{ + int type = 2; + size_t length = 1; + fwrite(&type, sizeof(int), 1, stream); + fwrite(&length, sizeof(size_t), 1, stream); + fwrite(&data, sizeof(double), length, stream); +} + +void +bin_append_string(char *data, FILE *stream) +{ + size_t length = strlen(data); + bin_append_string_with_len(data, length, stream); +} + +void +bin_append_string_with_len(char *data, size_t len, FILE *stream) +{ + int type = 3; + fwrite(&type, sizeof(int), 1, stream); + fwrite(&len, sizeof(size_t), 1, stream); + fwrite(data, sizeof(char), len, stream); +} + +void +bin_append_uint64(uint64_t data, FILE *stream) +{ + int type = 4; + size_t length = 1; + fwrite(&type, sizeof(int), 1, stream); + fwrite(&length, sizeof(size_t), 1, stream); + fwrite(&data, sizeof(uint64_t), length, stream); +} + +void +bin_append_size_t(size_t data, FILE *stream) +{ + int type = 5; + size_t length = 1; + fwrite(&type, sizeof(int), 1, stream); + fwrite(&length, sizeof(size_t), 1, stream); + fwrite(&data, sizeof(size_t), length, stream); +} + +void +bin_read_general(int *t, size_t *len, void **data, FILE *stream) +{ + int type = -1; + size_t length = 0; + fread(&type, sizeof(int), 1, stream); + fread(&length, sizeof(size_t), 1, stream); + void *_data; + if (type == 1) { + _data = (int *)calloc(length, sizeof(int)); + fread(_data, sizeof(int), length, stream); + } + else if (type == 2) { + _data = (double *)calloc(length, sizeof(double)); + fread(_data, sizeof(double), length, stream); + } + else if (type == 3) { + _data = (char *)calloc(length + 1, sizeof(char)); + fread(_data, sizeof(char), length, stream); + } + else if (type == 4) { + _data = (uint64_t *)calloc(length, sizeof(uint64_t)); + fread(_data, sizeof(uint64_t), length, stream); + } + else if (type == 5) { + _data = (size_t *)calloc(length, sizeof(size_t)); + fread(_data, sizeof(size_t), length, stream); + } + data[0] = (void *)_data; + *t = type; + *len = length; +} + +size_t +miqs_skip_field(FILE *stream) +{ + size_t rst = 0; + int type = -1; + size_t length = 0; + fread(&type, sizeof(int), 1, stream); + if (type == EOF) { + return rst; // end of file, nothing to skip + } + rst += sizeof(int); + fread(&length, sizeof(size_t), 1, stream); + rst += sizeof(size_t); + void *_data; + if (type == 1) { + _data = (int *)calloc(length, sizeof(int)); + fread(_data, sizeof(int), length, stream); + rst += sizeof(int) * length; + } + else if (type == 2) { + _data = (double *)calloc(length, sizeof(double)); + fread(_data, sizeof(double), length, stream); + rst += sizeof(double) * length; + } + else if (type == 3) { + _data = (char *)calloc(length + 1, sizeof(char)); + fread(_data, sizeof(char), length, stream); + rst += sizeof(char) * length; + } + else if (type == 4) { + _data = (uint64_t *)calloc(length, sizeof(uint64_t)); + fread(_data, sizeof(uint64_t), length, stream); + rst += sizeof(uint64_t) * length; + } + else if (type == 5) { + _data = (size_t *)calloc(length, sizeof(size_t)); + fread(_data, sizeof(size_t), length, stream); + rst += sizeof(size_t) * length; + } + free(_data); + return rst; +} + +void * +bin_read_index_numeric_value(int *is_float, FILE *file) +{ + int type = 1; + size_t len = 1; + void **data = (void **)calloc(1, sizeof(void *)); + bin_read_general(&type, &len, data, file); + if (len == 1) { + if (type == 1) { + *is_float = 0; + } + else if (type == 2) { + *is_float = 1; + } + } + return *data; +} + +int * +bin_read_int(FILE *file) +{ + int type = 1; + size_t len = 1; + void **data = (void **)calloc(1, sizeof(void *)); + bin_read_general(&type, &len, data, file); + if (type == 1 && len == 1) { + return (int *)*data; + } + return NULL; +} + +double * +bin_read_double(FILE *file) +{ + int type = 2; + size_t len = 1; + void **data = (void **)calloc(1, sizeof(void *)); + bin_read_general(&type, &len, data, file); + if (type == 2 && len == 1) { + return (double *)*data; + } + return NULL; +} + +char * +bin_read_string(FILE *file) +{ + int type = 3; + size_t len = 1; + void **data = (void **)calloc(1, sizeof(void *)); + bin_read_general(&type, &len, data, file); + if (type == 3) { + return (char *)*data; + } + return NULL; +} + +uint64_t * +bin_read_uint64(FILE *file) +{ + int type = 4; + size_t len = 1; + void **data = (void **)calloc(1, sizeof(void *)); + bin_read_general(&type, &len, data, file); + if (type == 4 && len == 1) { + return (uint64_t *)*data; + } + return NULL; +} + +size_t * +bin_read_size_t(FILE *file) +{ + int type = 5; + size_t len = 1; + void **data = (void **)calloc(1, sizeof(void *)); + bin_read_general(&type, &len, data, file); + if (type == 5 && len == 1) { + return (size_t *)*data; + } + return NULL; +} + +// type: 1, int, 2, float, 3. string, 4. uint64 5. size_t +void +bin_append_type(int type, FILE *stream) +{ + bin_append_int(type, stream); +} diff --git a/src/commons/file/include/bin_file_ops.h b/src/commons/file/include/bin_file_ops.h new file mode 100644 index 000000000..3a6890234 --- /dev/null +++ b/src/commons/file/include/bin_file_ops.h @@ -0,0 +1,39 @@ + +/* File foo. */ +#ifndef BIN_FILE_OPS_H +#define BIN_FILE_OPS_H + +#include +#include +#include +#include + +void bin_append_int(int data, FILE *stream); + +void bin_append_double(double data, FILE *stream); + +void bin_append_string(char *data, FILE *stream); + +void bin_append_string_with_len(char *data, size_t len, FILE *stream); + +void bin_append_uint64(uint64_t data, FILE *stream); + +void bin_append_size_t(size_t data, FILE *stream); + +void bin_append_type(int type, FILE *stream); + +int *bin_read_int(FILE *file); + +double *bin_read_double(FILE *file); + +void *bin_read_index_numeric_value(int *is_float, FILE *file); + +char *bin_read_string(FILE *file); + +uint64_t *bin_read_uint64(FILE *file); + +size_t *bin_read_size_t(FILE *file); + +size_t bin_skip_field(FILE *stream); + +#endif /* !BIN_FILE_OPS_H */ diff --git a/src/commons/generic/include/pdc_generic.h b/src/commons/generic/include/pdc_generic.h index fc9925ea9..86bdd7447 100644 --- a/src/commons/generic/include/pdc_generic.h +++ b/src/commons/generic/include/pdc_generic.h @@ -6,6 +6,7 @@ #include #include #include +#include #ifndef __cplusplus #if __STDC_VERSION__ >= 199901L @@ -18,135 +19,141 @@ typedef enum { false = 0, true = 1 } bool; #endif typedef enum pdc_c_var_type_t { - PDC_UNKNOWN = -1, /* error */ - PDC_INT = 0, /* integer types (identical to int32_t) */ - PDC_FLOAT = 1, /* floating-point types */ - PDC_DOUBLE = 2, /* double types */ - PDC_CHAR = 3, /* character types */ - PDC_STRING = 4, /* string types */ - PDC_BOOLEAN = 5, /* boolean types */ - PDC_SHORT = 6, /* short types */ - PDC_UINT = 7, /* unsigned integer types (identical to uint32_t) */ - PDC_INT64 = 8, /* 64-bit integer types */ - PDC_UINT64 = 9, /* 64-bit unsigned integer types */ - PDC_INT16 = 10, /* 16-bit integer types */ - PDC_INT8 = 11, /* 8-bit integer types */ - PDC_UINT8 = 12, /* 8-bit unsigned integer types */ - PDC_UINT16 = 13, /* 16-bit unsigned integer types */ - PDC_INT32 = 14, /* 32-bit integer types */ - PDC_UINT32 = 15, /* 32-bit unsigned integer types */ - PDC_LONG = 16, /* long types */ - PDC_VOID_PTR = 17, /* void pointer type */ - PDC_SIZE_T = 18, /* size_t type */ - PDC_TYPE_COUNT = 19 /* this is the number of var types and has to be the last */ + PDC_UNKNOWN = 0, /* error */ + PDC_SHORT = 1, /* short types */ + PDC_INT = 2, /* integer types (identical to int32_t) */ + PDC_UINT = 3, /* unsigned integer types (identical to uint32_t) */ + PDC_LONG = 4, /* long types */ + PDC_INT8 = 5, /* 8-bit integer types */ + PDC_UINT8 = 6, /* 8-bit unsigned integer types */ + PDC_INT16 = 7, /* 16-bit integer types */ + PDC_UINT16 = 8, /* 16-bit unsigned integer types */ + PDC_INT32 = 9, /* 32-bit integer types, already listed as PDC_INT */ + PDC_UINT32 = 10, /* 32-bit unsigned integer types */ + PDC_INT64 = 11, /* 64-bit integer types */ + PDC_UINT64 = 12, /* 64-bit unsigned integer types */ + PDC_FLOAT = 13, /* floating-point types */ + PDC_DOUBLE = 14, /* double types */ + PDC_CHAR = 15, /* character types */ + PDC_STRING = 16, /* string types */ + PDC_BOOLEAN = 17, /* boolean types */ + PDC_VOID_PTR = 18, /* void pointer type */ + PDC_SIZE_T = 19, /* size_t type */ + PDC_BULKI = 20, /* BULKI type */ + PDC_BULKI_ENT = 21, /* BULKI_ENTITY type */ + PDC_TYPE_COUNT = 22 /* this is the number of var types and has to be the last */ } pdc_c_var_type_t; // typedef pdc_c_var_type_t PDC_CType; typedef enum pdc_c_var_class_t { - PDC_CLS_SCALAR, - PDC_CLS_ARRAY, - PDC_CLS_ENUM, // not implemented, users can use PDC_CT_INT - PDC_CLS_STRUCT, // not implemented, users can use embedded key value pairs for the members in a struct - PDC_CLS_UNION, // not implemented, users can use embedded key value pairs for the only one member value - // in a union. - PDC_CLS_POINTER, // not implemented, users can use PDC_CT_INT64_T to store the pointer address, but - // won't work for distributed memory. - PDC_CLS_FUNCTION, // not implemented, users can use PDC_CT_INT64_T to store the function address, but - // won't work for distributed memory. - PDC_CLS_COUNT // just the count of the enum. + PDC_CLS_ITEM = 0, + PDC_CLS_ARRAY = 1, + PDC_CLS_COUNT = 2 // just the count of the enum. } pdc_c_var_class_t; // typedef pdc_c_var_class_t PDC_CType_Class; // clang-format off static size_t DataTypeSizes[PDC_TYPE_COUNT] = { - sizeof(int), - sizeof(float), - sizeof(double), - sizeof(char), - sizeof(char *), - sizeof(bool), - sizeof(short), - sizeof(unsigned int), - sizeof(int64_t), - sizeof(uint64_t), - sizeof(int16_t), - sizeof(int8_t), - sizeof(uint8_t), - sizeof(uint16_t), - sizeof(int32_t), - sizeof(uint32_t), - sizeof(long), - sizeof(void *), - sizeof(size_t) + 0, /* PDC_UNKNOWN, error, size 0 as placeholder */ + sizeof(short), /* PDC_SHORT */ + sizeof(int), /* PDC_INT */ + sizeof(unsigned int), /* PDC_UINT */ + sizeof(long), /* PDC_LONG */ + sizeof(int8_t), /* PDC_INT8 */ + sizeof(uint8_t), /* PDC_UINT8 */ + sizeof(int16_t), /* PDC_INT16 */ + sizeof(uint16_t), /* PDC_UINT16 */ + sizeof(int32_t), /* PDC_INT32, already covered by PDC_INT */ + sizeof(uint32_t), /* PDC_UINT32 */ + sizeof(int64_t), /* PDC_INT64 */ + sizeof(uint64_t), /* PDC_UINT64 */ + sizeof(float), /* PDC_FLOAT */ + sizeof(double), /* PDC_DOUBLE */ + sizeof(char), /* PDC_CHAR */ + sizeof(char *), /* PDC_STRING, assuming pointer to char */ + sizeof(bool), /* PDC_BOOLEAN, assuming C99 _Bool or C++ bool, typically 1 byte */ + sizeof(void *), /* PDC_VOID_PTR */ + sizeof(size_t), /* PDC_SIZE_T */ + sizeof(void *), /* PDC_BULKI, custom type, size 0 as placeholder */ + sizeof(void *), /* PDC_BULKI_ENT, custom type, size 0 as placeholder */ }; static char *DataTypeNames[PDC_TYPE_COUNT] = { - "int", - "float", - "double", - "char", - "char*", - "bool", - "short", - "unsigned int", - "int64_t", - "uint64_t", - "int16_t", - "int8_t", - "uint8_t", - "uint16_t", - "int32_t", - "uint32_t", - "long", - "void*", - "size_t" + "Unknown", /* PDC_UNKNOWN */ + "short", /* PDC_SHORT */ + "int", /* PDC_INT */ + "unsigned int", /* PDC_UINT */ + "long", /* PDC_LONG */ + "int8_t", /* PDC_INT8 */ + "uint8_t", /* PDC_UINT8 */ + "int16_t", /* PDC_INT16 */ + "uint16_t", /* PDC_UINT16 */ + "int32_t", /* PDC_INT32, already covered by PDC_INT */ + "uint32_t", /* PDC_UINT32 */ + "int64_t", /* PDC_INT64 */ + "uint64_t", /* PDC_UINT64 */ + "float", /* PDC_FLOAT */ + "double", /* PDC_DOUBLE */ + "char", /* PDC_CHAR */ + "char*", /* PDC_STRING */ + "bool", /* PDC_BOOLEAN */ + "void*", /* PDC_VOID_PTR */ + "size_t", /* PDC_SIZE_T */ + "BULKI", /* PDC_BULKI */ + "BULKI_ENTITY" /* PDC_BULKI_ENT */ }; static char *DataTypeEnumNames[PDC_TYPE_COUNT] = { - "PDC_INT", - "PDC_FLOAT", - "PDC_DOUBLE", - "PDC_CHAR", - "PDC_STRING", - "PDC_BOOLEAN", - "PDC_SHORT", - "PDC_UINT", - "PDC_INT64", - "PDC_UINT64", - "PDC_INT16", - "PDC_INT8", - "PDC_UINT8", - "PDC_UINT16", - "PDC_INT32", - "PDC_UINT32", - "PDC_LONG", - "PDC_VOID_PTR", - "PDC_SIZE_T" + "PDC_UNKNOWN", /* PDC_UNKNOWN */ + "PDC_SHORT", /* PDC_SHORT */ + "PDC_INT", /* PDC_INT */ + "PDC_UINT", /* PDC_UINT */ + "PDC_LONG", /* PDC_LONG */ + "PDC_INT8", /* PDC_INT8 */ + "PDC_UINT8", /* PDC_UINT8 */ + "PDC_INT16", /* PDC_INT16 */ + "PDC_UINT16", /* PDC_UINT16 */ + "PDC_INT32", /* PDC_INT32, already covered by PDC_INT */ + "PDC_UINT32", /* PDC_UINT32 */ + "PDC_INT64", /* PDC_INT64 */ + "PDC_UINT64", /* PDC_UINT64 */ + "PDC_FLOAT", /* PDC_FLOAT */ + "PDC_DOUBLE", /* PDC_DOUBLE */ + "PDC_CHAR", /* PDC_CHAR */ + "PDC_STRING", /* PDC_STRING */ + "PDC_BOOLEAN", /* PDC_BOOLEAN */ + "PDC_VOID_PTR", /* PDC_VOID_PTR */ + "PDC_SIZE_T", /* PDC_SIZE_T */ + "PDC_BULKI", /* PDC_BULKI */ + "PDC_BULKI_ENT" /* PDC_BULKI_ENT */ }; __attribute__((unused)) static char *DataTypeFormat[PDC_TYPE_COUNT] = { - "%d", // int - "%f", // float - "%lf", // double - "%c", // char - "%s", // char* - "%d", // bool (represented as an integer) - "%hd", // short - "%u", // unsigned int - "%lld", // int64_t - "%llu", // uint64_t - "%hd", // int16_t - "%hhd", // int8_t - "%hhu", // uint8_t - "%hu", // uint16_t - "%d", // int32_t - "%u", // uint32_t - "%ld", // long - "%p", // void* (pointer) - "%zu" // size_t + "", /* PDC_UNKNOWN, no format as it's an error/unknown type */ + "%hd", /* PDC_SHORT */ + "%d", /* PDC_INT */ + "%u", /* PDC_UINT */ + "%ld", /* PDC_LONG */ + "%" PRId8, /* PDC_INT8 */ + "%" PRIu8, /* PDC_UINT8 */ + "%" PRId16, /* PDC_INT16 */ + "%" PRIu16, /* PDC_UINT16 */ + "%d", /* PDC_INT32, already covered by PDC_INT */ + "%u", /* PDC_UINT32 */ + "%" PRId64, /* PDC_INT64 */ + "%" PRIu64, /* PDC_UINT64 */ + "%f", /* PDC_FLOAT */ + "%lf", /* PDC_DOUBLE */ + "%c", /* PDC_CHAR */ + "%s", /* PDC_STRING */ + "%d", /* PDC_BOOLEAN, represented as an integer */ + "%p", /* PDC_VOID_PTR */ + "%zu", /* PDC_SIZE_T */ + "%p", /* PDC_BULKI, assuming pointer or similar for custom type */ + "%p" /* PDC_BULKI_ENT, assuming pointer or similar for custom type */ }; // clang-format on @@ -171,7 +178,7 @@ get_size_by_class_n_type(void *data, size_t item_count, pdc_c_var_class_t pdc_cl pdc_c_var_type_t pdc_type) { size_t size = 0; - if (pdc_class == PDC_CLS_SCALAR) { + if (pdc_class == PDC_CLS_ITEM) { if (pdc_type == PDC_STRING) { size = (strlen((char *)data) + 1) * sizeof(char); } @@ -212,4 +219,69 @@ get_dtype_by_enum_name(const char *enumName) return PDC_UNKNOWN; // assuming PDC_UNKNOWN is the enum value for "unknown" } +__attribute__((unused)) static char * +get_format_by_dtype(pdc_c_var_type_t type) +{ + if (type < 0 || type >= PDC_TYPE_COUNT) { + return NULL; + } + return DataTypeFormat[type]; +} + +__attribute__((unused)) static bool +is_PDC_UINT(pdc_c_var_type_t type) +{ + if (type == PDC_UINT || type == PDC_UINT64 || type == PDC_UINT16 || type == PDC_UINT8 || + type == PDC_UINT32 || type == PDC_SIZE_T) { + return true; + } + return false; +} + +__attribute__((unused)) static bool +is_PDC_INT(pdc_c_var_type_t type) +{ + if (type == PDC_INT || type == PDC_INT64 || type == PDC_INT16 || type == PDC_INT8 || type == PDC_INT32 || + type == PDC_LONG) { + return true; + } + return false; +} + +__attribute__((unused)) static bool +is_PDC_FLOAT(pdc_c_var_type_t type) +{ + if (type == PDC_FLOAT || type == PDC_DOUBLE) { + return true; + } + return false; +} + +__attribute__((unused)) static bool +is_PDC_STRING(pdc_c_var_type_t type) +{ + if (type == PDC_CHAR || type == PDC_STRING) { + return true; + } + return false; +} + +__attribute__((unused)) static bool +is_PDC_NUMERIC(pdc_c_var_type_t type) +{ + if (is_PDC_INT(type) || is_PDC_UINT(type) || is_PDC_FLOAT(type)) { + return true; + } + return false; +} + +/** + * get numeric value from a string. + * @param str + * @param type + * @param val_ptr + * @return the size of the value. + */ +size_t get_number_from_string(char *str, pdc_c_var_type_t type, void **val_ptr); + #endif /* PDC_GENERIC_H */ diff --git a/src/commons/generic/include/pdc_generic_old.h b/src/commons/generic/include/pdc_generic_old.h new file mode 100644 index 000000000..026f9961b --- /dev/null +++ b/src/commons/generic/include/pdc_generic_old.h @@ -0,0 +1,264 @@ +#ifndef PDC_GENERIC_H +#define PDC_GENERIC_H + +#include +#include +#include +#include +#include + +#ifndef __cplusplus +#if __STDC_VERSION__ >= 199901L +/* C99 or later */ +#include +#else +/* Pre-C99 */ +typedef enum { false = 0, true = 1 } bool; +#endif +#endif + +typedef enum pdc_c_var_type_t { + PDC_UNKNOWN = -1, /* error */ + PDC_INT = 0, /* integer types (identical to int32_t) */ + PDC_FLOAT = 1, /* floating-point types */ + PDC_DOUBLE = 2, /* double types */ + PDC_CHAR = 3, /* character types */ + PDC_STRING = 4, /* string types */ + PDC_BOOLEAN = 5, /* boolean types */ + PDC_SHORT = 6, /* short types */ + PDC_UINT = 7, /* unsigned integer types (identical to uint32_t) */ + PDC_INT64 = 8, /* 64-bit integer types */ + PDC_UINT64 = 9, /* 64-bit unsigned integer types */ + PDC_INT16 = 10, /* 16-bit integer types */ + PDC_INT8 = 11, /* 8-bit integer types */ + PDC_UINT8 = 12, /* 8-bit unsigned integer types */ + PDC_UINT16 = 13, /* 16-bit unsigned integer types */ + PDC_INT32 = 14, /* 32-bit integer types */ + PDC_UINT32 = 15, /* 32-bit unsigned integer types */ + PDC_LONG = 16, /* long types */ + PDC_VOID_PTR = 17, /* void pointer type */ + PDC_SIZE_T = 18, /* size_t type */ + PDC_BULKI = 19, /* BULKI type */ + PDC_BULKI_ENT = 20, /* BULKI_ENTITY type */ + PDC_TYPE_COUNT = 21 /* this is the number of var types and has to be the last */ +} pdc_c_var_type_t; + +// typedef pdc_c_var_type_t PDC_CType; + +typedef enum pdc_c_var_class_t { + PDC_CLS_ITEM = 0, + PDC_CLS_ARRAY = 1, + PDC_CLS_COUNT = 2 // just the count of the enum. +} pdc_c_var_class_t; + +// typedef pdc_c_var_class_t PDC_CType_Class; + +// clang-format off +static size_t DataTypeSizes[PDC_TYPE_COUNT] = { + sizeof(int), + sizeof(float), + sizeof(double), + sizeof(char), + sizeof(char *), + sizeof(bool), + sizeof(short), + sizeof(unsigned int), + sizeof(int64_t), + sizeof(uint64_t), + sizeof(int16_t), + sizeof(int8_t), + sizeof(uint8_t), + sizeof(uint16_t), + sizeof(int32_t), + sizeof(uint32_t), + sizeof(long), + sizeof(void *), + sizeof(size_t), + sizeof(void *), + sizeof(void *) +}; + +static char *DataTypeNames[PDC_TYPE_COUNT] = { + "int", + "float", + "double", + "char", + "char*", + "bool", + "short", + "unsigned int", + "int64_t", + "uint64_t", + "int16_t", + "int8_t", + "uint8_t", + "uint16_t", + "int32_t", + "uint32_t", + "long", + "void*", + "size_t", + "BULKI", + "BULKI_ENTITY" +}; + +static char *DataTypeEnumNames[PDC_TYPE_COUNT] = { + "PDC_INT", + "PDC_FLOAT", + "PDC_DOUBLE", + "PDC_CHAR", + "PDC_STRING", + "PDC_BOOLEAN", + "PDC_SHORT", + "PDC_UINT", + "PDC_INT64", + "PDC_UINT64", + "PDC_INT16", + "PDC_INT8", + "PDC_UINT8", + "PDC_UINT16", + "PDC_INT32", + "PDC_UINT32", + "PDC_LONG", + "PDC_VOID_PTR", + "PDC_SIZE_T", + "PDC_BULKI", + "PDC_BULKI_ENT" +}; + +__attribute__((unused)) +static char *DataTypeFormat[PDC_TYPE_COUNT] = { + "%d", // int + "%f", // float + "%lf", // double + "%c", // char + "%s", // char* + "%d", // bool (represented as an integer) + "%hd", // short + "%u", // unsigned int + "%lld", // int64_t + "%llu", // uint64_t + "%hd", // int16_t + "%hhd", // int8_t + "%hhu", // uint8_t + "%hu", // uint16_t + "%d", // int32_t + "%u", // uint32_t + "%ld", // long + "%p", // void* (pointer) + "%zu", // size_t + "%p", // BULKI + "%p" // BULKI_ENTITY +}; + +// clang-format on +__attribute__((unused)) static char * +get_enum_name_by_dtype(pdc_c_var_type_t type) +{ + if (type < 0 || type >= PDC_TYPE_COUNT) { + return NULL; + } + return DataTypeEnumNames[type]; +} +__attribute__((unused)) static size_t +get_size_by_dtype(pdc_c_var_type_t type) +{ + if (type < 0 || type >= PDC_TYPE_COUNT) { + return 0; + } + return DataTypeSizes[type]; +} +__attribute__((unused)) static size_t +get_size_by_class_n_type(void *data, size_t item_count, pdc_c_var_class_t pdc_class, + pdc_c_var_type_t pdc_type) +{ + size_t size = 0; + if (pdc_class == PDC_CLS_ITEM) { + if (pdc_type == PDC_STRING) { + size = (strlen((char *)data) + 1) * sizeof(char); + } + else { + size = get_size_by_dtype(pdc_type); + } + } + else if (pdc_class == PDC_CLS_ARRAY) { + if (pdc_type == PDC_STRING) { + char **str_arr = (char **)data; + size_t i = 0; + for (i = 0; i < item_count; i++) { + size = size + (strlen(str_arr[i]) + 1) * sizeof(char); + } + } + else { + size = item_count * get_size_by_dtype(pdc_type); + } + } + return size; +} +__attribute__((unused)) static char * +get_name_by_dtype(pdc_c_var_type_t type) +{ + if (type < 0 || type >= PDC_TYPE_COUNT) { + return NULL; + } + return DataTypeNames[type]; +} +__attribute__((unused)) static pdc_c_var_type_t +get_dtype_by_enum_name(const char *enumName) +{ + for (int i = 0; i < PDC_TYPE_COUNT; i++) { + if (strcmp(DataTypeEnumNames[i], enumName) == 0) { + return (pdc_c_var_type_t)i; + } + } + return PDC_UNKNOWN; // assuming PDC_UNKNOWN is the enum value for "unknown" +} + +__attribute__((unused)) static pdc_c_var_type_t +is_PDC_UINT(pdc_c_var_type_t type) +{ + if (type == PDC_UINT || type == PDC_UINT64 || type == PDC_UINT16 || type == PDC_UINT8 || + type == PDC_UINT32 || type == PDC_SIZE_T) { + return true; + } + return false; +} + +__attribute__((unused)) static pdc_c_var_type_t +is_PDC_INT(pdc_c_var_type_t type) +{ + if (type == PDC_INT || type == PDC_INT64 || type == PDC_INT16 || type == PDC_INT8 || type == PDC_INT32 || + type == PDC_LONG) { + return true; + } + return false; +} + +__attribute__((unused)) static pdc_c_var_type_t +is_PDC_FLOAT(pdc_c_var_type_t type) +{ + if (type == PDC_FLOAT || type == PDC_DOUBLE) { + return true; + } + return false; +} + +__attribute__((unused)) static pdc_c_var_type_t +is_PDC_STRING(pdc_c_var_type_t type) +{ + if (type == PDC_CHAR || type == PDC_STRING) { + return true; + } + return false; +} + +__attribute__((unused)) static pdc_c_var_type_t +is_PDC_NUMERIC(pdc_c_var_type_t type) +{ + if (is_PDC_INT(type) || is_PDC_UINT(type) || is_PDC_FLOAT(type)) { + return true; + } + return false; +} + +#endif /* PDC_GENERIC_H */ diff --git a/src/commons/generic/pdc_generic.c b/src/commons/generic/pdc_generic.c new file mode 100644 index 000000000..198449769 --- /dev/null +++ b/src/commons/generic/pdc_generic.c @@ -0,0 +1,61 @@ +#include "pdc_generic.h" + +size_t +get_number_from_string(char *str, pdc_c_var_type_t type, void **val_ptr) +{ + if (val_ptr == NULL) { + return 0; + } + + void * k = NULL; + size_t key_len = get_size_by_dtype(type); + + k = malloc(key_len); + + switch (type) { + case PDC_SHORT: + *((short *)k) = (short)strtol(str, NULL, 10); + break; + case PDC_INT: + case PDC_INT32: + *((int *)k) = (int)strtol(str, NULL, 10); + break; + case PDC_UINT: + case PDC_UINT32: + *((unsigned int *)k) = (unsigned int)strtoul(str, NULL, 10); + break; + case PDC_LONG: + *((long *)k) = strtol(str, NULL, 10); + break; + case PDC_INT8: + *((int8_t *)k) = (int8_t)strtol(str, NULL, 10); + break; + case PDC_UINT8: + *((uint8_t *)k) = (uint8_t)strtoul(str, NULL, 10); + break; + case PDC_INT16: + *((int16_t *)k) = (int16_t)strtol(str, NULL, 10); + break; + case PDC_UINT16: + *((uint16_t *)k) = (uint16_t)strtoul(str, NULL, 10); + break; + case PDC_INT64: + *((int64_t *)k) = strtoll(str, NULL, 10); + break; + case PDC_UINT64: + *((uint64_t *)k) = strtoull(str, NULL, 10); + break; + case PDC_FLOAT: + *((float *)k) = strtof(str, NULL); + break; + case PDC_DOUBLE: + *((double *)k) = strtod(str, NULL); + break; + default: + free(k); + return 0; + } + + *val_ptr = k; + return key_len; +} \ No newline at end of file diff --git a/src/commons/index/dart/dart_core.c b/src/commons/index/dart/dart_core.c index 43410f4b5..2c75b1025 100644 --- a/src/commons/index/dart/dart_core.c +++ b/src/commons/index/dart/dart_core.c @@ -3,6 +3,24 @@ #include "dart_math.h" #include "dart_core.h" +#ifdef PDC_DART_MAX_SERVER_NUM_TO_ADAPT +#define DART_MAX_SERVER_NUM_TO_ADAPT PDC_DART_MAX_SERVER_NUM_TO_ADAPT +#else +#define DART_MAX_SERVER_NUM_TO_ADAPT 8192 +#endif + +#ifdef PDC_DART_ALPHABET_SIZE +#define DART_ALPHABET_SIZE PDC_DART_ALPHABET_SIZE +#else +#define DART_ALPHABET_SIZE 27 +#endif + +#ifdef PDC_DART_REPLICATION_FACTOR +#define DART_REPLICATION_FACTOR PDC_DART_REPLICATION_FACTOR +#else +#define DART_REPLICATION_FACTOR 3 +#endif + threadpool dart_thpool_g; threadpool @@ -24,26 +42,75 @@ is_index_write_op(dart_op_type_t op_type) } void -dart_space_init(DART *dart, int num_client, int num_server, int alphabet_size, int extra_tree_height, - int replication_factor) +dart_space_init(DART *dart, int num_server) +{ + __dart_space_init(dart, num_server, DART_ALPHABET_SIZE, 0, DART_REPLICATION_FACTOR, + DART_MAX_SERVER_NUM_TO_ADAPT); +} + +void +__dart_space_init(DART *dart, int num_server, int alphabet_size, int extra_tree_height, + int replication_factor, int max_server_num_to_adapt) { if (dart == NULL) { dart = (DART *)calloc(1, sizeof(DART)); } dart->alphabet_size = alphabet_size; - // initialize clients; - dart->num_client = num_client; // initialize servers; dart->num_server = num_server; - dart->dart_tree_height = (int)ceil(log_with_base((double)dart->alphabet_size, (double)dart->num_server)) + - 1 + extra_tree_height; + double physical_node_num = + max_server_num_to_adapt == 0 ? (double)num_server : (double)max_server_num_to_adapt; + + dart->dart_tree_height = + (int)ceil(log_with_base((double)dart->alphabet_size, physical_node_num)) + 1 + extra_tree_height; // calculate number of all leaf nodes dart->num_vnode = (uint64_t)pow(dart->alphabet_size, dart->dart_tree_height); dart->replication_factor = replication_factor; - dart_thpool_g = thpool_init(num_server); - dart->suffix_tree_mode = 1; + // dart_thpool_g = thpool_init(num_server); +} + +void +dart_determine_query_token_by_key_query(char *k_query, char **out_token, dart_op_type_t *out_op_type) +{ + if (out_token == NULL || out_op_type == NULL) { + return; + } + char *affix = NULL; + + pattern_type_t dart_query_type = determine_pattern_type(k_query); + switch (dart_query_type) { + case PATTERN_EXACT: + *out_token = strdup(k_query); + *out_op_type = OP_EXACT_QUERY; + break; + case PATTERN_PREFIX: + affix = subrstr(k_query, strlen(k_query) - 1); + *out_token = strdup(affix); + *out_op_type = OP_PREFIX_QUERY; + break; + case PATTERN_SUFFIX: + affix = substr(k_query, 1); +#ifndef PDC_DART_SFX_TREE + *out_token = reverse_str(affix); +#else + *out_token = strdup(affix); +#endif + *out_op_type = OP_SUFFIX_QUERY; + break; + case PATTERN_MIDDLE: + affix = substring(k_query, 1, strlen(k_query) - 1); + *out_token = strdup(affix); + *out_op_type = OP_INFIX_QUERY; + break; + default: + break; + } + if (affix != NULL) { + free(affix); + } } + /** * A utility function for dummies. * Get server id by given virtual node id. @@ -56,6 +123,27 @@ get_server_id_by_vnode_id(DART *dart, uint64_t vnode_id) return (vnode_id / num_vnode_per_server) % dart->num_server; } +size_t +get_vnode_ids_by_serverID(DART *dart, uint32_t serverID, uint64_t **out) +{ + if (out == NULL) { + return 0; + } + size_t num_result = 0; + uint64_t *temp_out = (uint64_t *)calloc(dart->num_vnode, sizeof(uint64_t)); + int vid = 0; + for (vid = 0; vid < dart->num_vnode; vid++) { + if (get_server_id_by_vnode_id(dart, vid) == serverID) { + temp_out[vid] = vid; + num_result++; + } + } + out[0] = (uint64_t *)calloc(num_result, sizeof(uint64_t)); + memcpy(out[0], temp_out, num_result * sizeof(uint64_t)); + free(temp_out); + return num_result; +} + /** * This function make the client request counter increment by 1. * @@ -95,150 +183,152 @@ get_base_virtual_node_id_by_string(DART *dart, char *str) return (rst % (uint64_t)dart->num_vnode); } -/** - * This function is for getting the alternative virtual node ID. - * - */ -uint64_t -get_reconciled_vnode_id_with_power_of_two_choice_rehashing(DART *dart, uint64_t base_vnode_idx, char *word, - get_server_info_callback get_server_cb) -{ - - int ir_idx = (int)ceil((double)(dart->alphabet_size / 2)); - - // base virtual node address always in the first element of the array. - uint64_t rst = base_vnode_idx; - - // determine the tree height. - int tree_height = dart->dart_tree_height; - - // get serverID for base virtual node - uint64_t serverId = get_server_id_by_vnode_id(dart, base_vnode_idx); - - // we first let reconciled virtual node to be the base virtual node. - uint64_t reconciled_vnode_idx = base_vnode_idx; - - if (dart->dart_tree_height <= 1) { - return reconciled_vnode_idx; - } - - // The procedure of picking alternative virtual node is important. - // We first need to know what is the character lying on the leaves of DART partition tree. - int last_c_index = tree_height - 1; - - int post_leaf_index = 0; - int pre_leaf_index = 0; - - // The pre_leaf_index is the index of character right before the leaf character. - // The post_leaf_index is the index of character right after the leaf character. - if (strlen(word) <= tree_height) { - // if the word is not longer than the tree height, then there is no post-leaf character, therefore - // post_leaf_index should be 0 - post_leaf_index = 0; - // then the last_c_index should be the index of the last character in the alphabet. - last_c_index = strlen(word) - 1; - if (strlen(word) <= 1) { - // if the word contains 0-1 character, there is no pre-leaf character and therefore pre_leaf_index - // should be 0. - pre_leaf_index = 0; - } - else { - // otherwise, pre_leaf_index should be the index of proceeding character of the - // leaf-level character in the alphabet. - pre_leaf_index = (int)word[last_c_index - 1] % dart->alphabet_size; - } - } - else { - // if the length of the word exceeds the height of the DART partition tree, - // definitely, post-leaf character exists. - post_leaf_index = (int)word[last_c_index + 1] % dart->alphabet_size; - // but, there is a case where DART partition tree is of height 1. - // in this case, there will be no pre-leaf character. - if (tree_height <= 1) { - pre_leaf_index = 0; - } - else { - // otherwise, there will be a pre-leaf character. - pre_leaf_index = (int)word[last_c_index - 1] % dart->alphabet_size; - } - } - int leaf_index = (int)word[last_c_index] % dart->alphabet_size; - - int leaf_post_sum = leaf_index + pre_leaf_index + post_leaf_index; - int leaf_post_diff = abs(post_leaf_index - leaf_index - pre_leaf_index); - - // int leaf_post_sum = leaf_index + pre_leaf_index + 0; - // int leaf_post_diff = abs(leaf_index-pre_leaf_index); - - // We calculate the region size: - int region_size = dart->num_vnode / dart->alphabet_size; // d=1, rs = 1; d = 2, rs = k; d = 3, rs =k^2; - // We calculate the sub-region size: - int sub_region_size = region_size / dart->alphabet_size; // d=1, srs = 0; d = 2, srs = 1; d = 3, srs = k; - - // We calculate the major offset which possibly pick a virtual node in another sub-region. - int major_offset = (leaf_post_sum % dart->alphabet_size) * (sub_region_size); - // We calcuate the minor offset which will possibly pick a different virtual node within the same - // sub-region. - int minor_offset = leaf_post_diff; - // Finally the region offset will be some certain virtual node in one region. - // uint64_t region_offset = (reconciled_vnode_idx + (uint64_t)major_offset - (uint64_t)minor_offset) - // % (uint64_t)region_size; - uint64_t region_offset = - (reconciled_vnode_idx + (uint64_t)major_offset - (uint64_t)minor_offset) % (uint64_t)region_size; - // Invert region Index: ceil(alphabet_size / 2); - - int n = 0; - uint64_t c; - // uint64_t rst = 0; - uint64_t i_t_n; - int met_end = 0; - for (n = 1; n <= dart->dart_tree_height; n++) { - if (word[n - 1] == '\0') { - met_end = 1; - } - if (word[n - 1] != '\0' && met_end == 0) { - if (n == 1) { - i_t_n = ((int)word[n - 1] + ir_idx) % dart->alphabet_size; - } - else if (n == (dart->dart_tree_height - 1)) { - i_t_n = ((int)word[n - 1] + leaf_post_sum) % dart->alphabet_size; - } - else if (n == dart->dart_tree_height) { - i_t_n = abs((int)word[n - 1] - leaf_post_diff) % dart->alphabet_size; - } - } - c = (i_t_n) * ((uint64_t)uint32_pow(dart->alphabet_size, dart->dart_tree_height - n)); - rst += c; - } - - int alterV = (rst % (uint64_t)dart->num_vnode); - - // // We also calculate the region start position. - // uint64_t region_start = ((((int)word[0]+ir_idx) % dart->alphabet_size)) * region_size;// - // ((reconciled_vnode_idx)/region_size) * (region_size); - // // Finally, the reconciled vnode index is calculated. - // // reconciled_vnode_idx = (0 + region_start + region_offset) % dart->num_vnode; - // reconciled_vnode_idx = (reconciled_vnode_idx + region_start + region_offset) % dart->num_vnode; - - // Only when inserting a word, we do such load detection. - // get alternative virtual node and therefore the alternative server ID. - int reconcile_serverId = get_server_id_by_vnode_id(dart, alterV); - if (get_server_cb != NULL) { - // Check both physical server to see which one has smaller number of indexed keywords on it. - dart_server origin_server = get_server_cb(serverId); - dart_server reconciled_server = get_server_cb(reconcile_serverId); - // printf("For keyword %s, choosing between %d and %d\n", word, serverId, reconcile_serverId); - - if (origin_server.indexed_word_count > reconciled_server.indexed_word_count) { - // printf("Reconcile happened. from %d to %d\n", vnode_idx , reconciled_vnode_idx); - rst = alterV; - } - } - else { - rst = alterV; - } - return rst; -} +// /** +// * This function is for getting the alternative virtual node ID. +// * +// */ +// uint64_t +// get_reconciled_vnode_id_with_power_of_two_choice_rehashing(DART *dart, uint64_t base_vnode_idx, char *word, +// get_server_info_callback get_server_cb) +// { + +// int ir_idx = (int)ceil((double)(dart->alphabet_size / 2)); + +// // base virtual node address always in the first element of the array. +// uint64_t rst = base_vnode_idx; + +// // determine the tree height. +// int tree_height = dart->dart_tree_height; + +// // get serverID for base virtual node +// uint64_t serverId = get_server_id_by_vnode_id(dart, base_vnode_idx); + +// // we first let reconciled virtual node to be the base virtual node. +// uint64_t reconciled_vnode_idx = base_vnode_idx; + +// if (dart->dart_tree_height <= 1) { +// return reconciled_vnode_idx; +// } + +// // The procedure of picking alternative virtual node is important. +// // We first need to know what is the character lying on the leaves of DART partition tree. +// int last_c_index = tree_height - 1; + +// int post_leaf_index = 0; +// int pre_leaf_index = 0; + +// // The pre_leaf_index is the index of character right before the leaf character. +// // The post_leaf_index is the index of character right after the leaf character. +// if (strlen(word) <= tree_height) { +// // if the word is not longer than the tree height, then there is no post-leaf character, therefore +// // post_leaf_index should be 0 +// post_leaf_index = 0; +// // then the last_c_index should be the index of the last character in the alphabet. +// last_c_index = strlen(word) - 1; +// if (strlen(word) <= 1) { +// // if the word contains 0-1 character, there is no pre-leaf character and therefore +// pre_leaf_index +// // should be 0. +// pre_leaf_index = 0; +// } +// else { +// // otherwise, pre_leaf_index should be the index of proceeding character of the +// // leaf-level character in the alphabet. +// pre_leaf_index = (int)word[last_c_index - 1] % dart->alphabet_size; +// } +// } +// else { +// // if the length of the word exceeds the height of the DART partition tree, +// // definitely, post-leaf character exists. +// post_leaf_index = (int)word[last_c_index + 1] % dart->alphabet_size; +// // but, there is a case where DART partition tree is of height 1. +// // in this case, there will be no pre-leaf character. +// if (tree_height <= 1) { +// pre_leaf_index = 0; +// } +// else { +// // otherwise, there will be a pre-leaf character. +// pre_leaf_index = (int)word[last_c_index - 1] % dart->alphabet_size; +// } +// } +// int leaf_index = (int)word[last_c_index] % dart->alphabet_size; + +// int leaf_post_sum = leaf_index + pre_leaf_index + post_leaf_index; +// int leaf_post_diff = abs(post_leaf_index - leaf_index - pre_leaf_index); + +// // int leaf_post_sum = leaf_index + pre_leaf_index + 0; +// // int leaf_post_diff = abs(leaf_index-pre_leaf_index); + +// // We calculate the region size: +// int region_size = dart->num_vnode / dart->alphabet_size; // d=1, rs = 1; d = 2, rs = k; d = 3, rs =k^2; +// // We calculate the sub-region size: +// int sub_region_size = region_size / dart->alphabet_size; // d=1, srs = 0; d = 2, srs = 1; d = 3, srs = +// k; + +// // We calculate the major offset which possibly pick a virtual node in another sub-region. +// int major_offset = (leaf_post_sum % dart->alphabet_size) * (sub_region_size); +// // We calcuate the minor offset which will possibly pick a different virtual node within the same +// // sub-region. +// int minor_offset = leaf_post_diff; +// // Finally the region offset will be some certain virtual node in one region. +// // uint64_t region_offset = (reconciled_vnode_idx + (uint64_t)major_offset - (uint64_t)minor_offset) +// // % (uint64_t)region_size; +// uint64_t region_offset = +// (reconciled_vnode_idx + (uint64_t)major_offset - (uint64_t)minor_offset) % (uint64_t)region_size; +// // Invert region Index: ceil(alphabet_size / 2); + +// int n = 0; +// uint64_t c; +// // uint64_t rst = 0; +// uint64_t i_t_n; +// int met_end = 0; +// for (n = 1; n <= dart->dart_tree_height; n++) { +// if (word[n - 1] == '\0') { +// met_end = 1; +// } +// if (word[n - 1] != '\0' && met_end == 0) { +// if (n == 1) { +// i_t_n = ((int)word[n - 1] + ir_idx) % dart->alphabet_size; +// } +// else if (n == (dart->dart_tree_height - 1)) { +// i_t_n = ((int)word[n - 1] + leaf_post_sum) % dart->alphabet_size; +// } +// else if (n == dart->dart_tree_height) { +// i_t_n = abs((int)word[n - 1] - leaf_post_diff) % dart->alphabet_size; +// } +// } +// c = (i_t_n) * ((uint64_t)uint32_pow(dart->alphabet_size, dart->dart_tree_height - n)); +// rst += c; +// } + +// int alterV = (rst % (uint64_t)dart->num_vnode); + +// // // We also calculate the region start position. +// // uint64_t region_start = ((((int)word[0]+ir_idx) % dart->alphabet_size)) * region_size;// +// // ((reconciled_vnode_idx)/region_size) * (region_size); +// // // Finally, the reconciled vnode index is calculated. +// // // reconciled_vnode_idx = (0 + region_start + region_offset) % dart->num_vnode; +// // reconciled_vnode_idx = (reconciled_vnode_idx + region_start + region_offset) % dart->num_vnode; + +// // Only when inserting a word, we do such load detection. +// // get alternative virtual node and therefore the alternative server ID. +// int reconcile_serverId = get_server_id_by_vnode_id(dart, alterV); +// if (get_server_cb != NULL) { +// // Check both physical server to see which one has smaller number of indexed keywords on it. +// dart_server origin_server = get_server_cb(serverId); +// dart_server reconciled_server = get_server_cb(reconcile_serverId); +// // printf("For keyword %s, choosing between %d and %d\n", word, serverId, reconcile_serverId); + +// if (origin_server.indexed_word_count > reconciled_server.indexed_word_count) { +// // printf("Reconcile happened. from %d to %d\n", vnode_idx , reconciled_vnode_idx); +// rst = alterV; +// } +// } +// else { +// rst = alterV; +// } +// return rst; +// } /** * This function is for getting the alternative virtual node ID. @@ -342,8 +432,12 @@ get_reconciled_vnode_id_with_power_of_two_choice_rehashing_2(DART *dart, uint64_ int reconcile_serverId = get_server_id_by_vnode_id(dart, reconciled_vnode_idx); if (get_server_cb != NULL) { // Check both physical server to see which one has smaller number of indexed keywords on it. - dart_server origin_server = get_server_cb(serverId); - dart_server reconciled_server = get_server_cb(reconcile_serverId); + dart_server origin_server; + origin_server.id = serverId; + get_server_cb(&origin_server); + dart_server reconciled_server; + reconciled_server.id = reconcile_serverId; + get_server_cb(&reconciled_server); // printf("For keyword %s, choosing between %d and %d\n", word, serverId, reconcile_serverId); if (origin_server.indexed_word_count > reconciled_server.indexed_word_count) { @@ -413,7 +507,7 @@ get_server_ids_for_insert(DART *dart_g, char *keyword, get_server_info_callback uint64_t alter_virtual_node_id = get_reconciled_vnode_id_with_power_of_two_choice_rehashing_2( dart_g, base_virtual_node_id, keyword, get_server_cb); // We call the following function to calculate all the server IDs. - int is_physical = 1; + int is_physical = 0; int rst_len = get_replica_node_ids(dart_g, alter_virtual_node_id, is_physical, out); return rst_len; } @@ -454,7 +548,7 @@ get_server_ids_for_query(DART *dart_g, char *token, dart_op_type_t op_type, uint } if (op_type == OP_INSERT) { return 0; - } // For INSERT operation ,we return nothing here. + } // For INSERT operation, we return nothing here. // We first eliminate possibility of INFIX query. // Note: if suffix tree mode is ON, we don't have to search all servers. @@ -523,8 +617,10 @@ get_server_ids_for_query(DART *dart_g, char *token, dart_op_type_t op_type, uint uint64_t *base_replicas; uint64_t *alter_replicas; - int num_base_reps = get_replica_node_ids(dart_g, base_virtual_node_id, 1, &base_replicas); - int num_alter_reps = get_replica_node_ids(dart_g, reconciled_vnode_id, 1, &alter_replicas); + int is_physical = 0; + + int num_base_reps = get_replica_node_ids(dart_g, base_virtual_node_id, is_physical, &base_replicas); + int num_alter_reps = get_replica_node_ids(dart_g, reconciled_vnode_id, is_physical, &alter_replicas); if (op_type == OP_DELETE) { // for delete operations, we need to perform delete on all replicas out[0] = (uint64_t *)calloc(num_base_reps + num_alter_reps, sizeof(uint64_t)); @@ -588,7 +684,8 @@ DART_hash(DART *dart_g, char *key, dart_op_type_t op_type, get_server_info_callb char * tok = NULL; *out = NULL; - // regardless of suffix tree mode, we only need to get the DART hash result for one time. + // regardless of suffix tree mode, we only need to get the DART hash result for one time for query + // operations. int loop_count = 1; if (is_index_write_op(op_type)) { #ifdef PDC_DART_SFX_TREE @@ -601,12 +698,18 @@ DART_hash(DART *dart_g, char *key, dart_op_type_t op_type, get_server_info_callb loop_count = 2; #endif } - int iter = 0; + // printf("[DART_hash] key = %s\n", key); + int iter = 0; + int is_suffix = 0; for (iter = 0; iter < loop_count; iter++) { #ifdef PDC_DART_SFX_TREE tok = substring(key, iter, strlen(key)); + // when suffix tree mode is ON, we store suffixes of the key to the suffix trie. + is_suffix = iter > 0 ? 1 : 0; #else - tok = iter == 0 ? strdup(key) : reverse_str(key); + tok = iter > 0 ? reverse_str(key) : strdup(key); + // when suffix tree mode is OFF, we store reversed string to the suffix trie. + is_suffix = iter > 0 ? 1 : 0; #endif /* ************ [START] CORE DART HASH FOR EVERY SINGLE TOKEN ************** */ if (op_type == OP_INSERT) { @@ -627,8 +730,10 @@ DART_hash(DART *dart_g, char *key, dart_op_type_t op_type, get_server_info_callb *out = (index_hash_result_t *)realloc(*out, ret_value * sizeof(index_hash_result_t)); } for (int j = 0; j < tmp_out_len; j++) { - (*out)[ret_value - tmp_out_len + j].server_id = temp_out[j]; + (*out)[ret_value - tmp_out_len + j].virtual_node_id = temp_out[j]; + (*out)[ret_value - tmp_out_len + j].server_id = get_server_id_by_vnode_id(dart_g, temp_out[j]); (*out)[ret_value - tmp_out_len + j].key = tok; + (*out)[ret_value - tmp_out_len + j].is_suffix = is_suffix; } if (temp_out != NULL) free(temp_out); diff --git a/src/commons/index/dart/dart_core_test.c b/src/commons/index/dart/dart_core_test.c index 30f17c7d6..43ac0b7d3 100644 --- a/src/commons/index/dart/dart_core_test.c +++ b/src/commons/index/dart/dart_core_test.c @@ -36,7 +36,7 @@ main(int argc, char *argv[]) all_servers[i].request_count = 0; } - dart_space_init(&dart, num_client, num_server, alphabet_size, extra_tree_height, replication_factor); + __dart_space_init(&dart, num_server, alphabet_size, extra_tree_height, replication_factor, 1024); println( "num_server: %d, num_client: %d, alphabet_size: %d, extra_tree_height: %d, replication_factor: %d", diff --git a/src/commons/index/dart/include/dart_core.h b/src/commons/index/dart/include/dart_core.h index fedfbcaa1..a1d211020 100644 --- a/src/commons/index/dart/include/dart_core.h +++ b/src/commons/index/dart/include/dart_core.h @@ -17,7 +17,7 @@ #include "string_utils.h" #include "pdc_config.h" -typedef enum { NUMERIC = 1, TIME = 2, CHAR = 3, BINARY = 4 } dart_indexed_value_type_t; +typedef enum { INTEGER = 1, FLOAT = 2, STRING = 3 } dart_indexed_value_type_t; typedef enum { DHT_FULL_HASH = 1, DHT_INITIAL_HASH = 2, DART_HASH = 3 } dart_hash_algo_t; @@ -27,7 +27,8 @@ typedef enum { OP_PREFIX_QUERY = 3, OP_SUFFIX_QUERY = 4, OP_INFIX_QUERY = 5, - OP_DELETE = 6 + OP_DELETE = 6, + OP_RANGE_QUERY = 7, } dart_op_type_t; typedef enum { REF_PRIMARY_ID = 1, REF_SECONDARY_ID = 2, REF_SERVER_ID = 3 } dart_object_ref_type_t; @@ -43,11 +44,9 @@ typedef struct { int dart_tree_height; int replication_factor; int client_request_count; - uint32_t num_client; uint32_t num_server; uint64_t num_vnode; dart_vnode *vnodes; - int suffix_tree_mode; } DART; typedef struct { @@ -64,21 +63,27 @@ typedef struct { typedef struct { uint32_t server_id; char * key; + uint64_t virtual_node_id; + int8_t is_suffix; // 1: this is a suffix of another key, 0: this is just a normal key. } index_hash_result_t; +void dart_determine_query_token_by_key_query(char *key_query, char **out_token, dart_op_type_t *out_op_type); + // Defining a function pointer by which the server load information can be retrieved. // The returning data type should be dart_server, which is a struct. // The parameter should be a uint32_t. // The function name can be anything. -typedef dart_server (*get_server_info_callback)(uint32_t server_id); +typedef void (*get_server_info_callback)(dart_server *server_ptr); /** * Initialize the DART space. * * */ -void dart_space_init(DART *dart, int num_client, int num_server, int alphabet_size, int extra_tree_height, - int replication_factor); +void dart_space_init(DART *dart, int num_server); + +void __dart_space_init(DART *dart, int num_server, int alphabet_size, int extra_tree_height, + int replication_factor, int max_server_num_to_adapt); /** * This function make the client request counter increment by 1. @@ -93,18 +98,26 @@ int dart_client_request_count_incr(DART *dart_g); uint64_t get_server_id_by_vnode_id(DART *dart, uint64_t vnode_id); /** - * This function is for getting the base virtual node ID by a given string. + * This function is for getting the virtual node IDs that a given server ID can be mapped to. * + * The return value is the length of the ID array. */ -uint64_t get_base_virtual_node_id_by_string(DART *dart, char *str); +size_t get_vnode_ids_by_serverID(DART *dart, uint32_t serverID, uint64_t **out); /** - * This function is for getting the alternative virtual node ID. + * This function is for getting the base virtual node ID by a given string. * */ -uint64_t get_reconciled_vnode_id_with_power_of_two_choice_rehashing(DART *dart, uint64_t base_vnode_idx, - char * word, - get_server_info_callback get_server_cb); +uint64_t get_base_virtual_node_id_by_string(DART *dart, char *str); + +// /** +// * This function is for getting the alternative virtual node ID. +// * +// */ +// uint64_t get_reconciled_vnode_id_with_power_of_two_choice_rehashing(DART *dart, uint64_t base_vnode_idx, +// char * word, +// get_server_info_callback +// get_server_cb); /** * Get IDs of all virtual nodes of replicas by given string and overall tree-height setting. diff --git a/src/commons/index/dart/index/hashtable/hashtable_impl.c b/src/commons/index/dart/index/hashtable/hashtable_impl.c new file mode 100644 index 000000000..15cbf5e88 --- /dev/null +++ b/src/commons/index/dart/index/hashtable/hashtable_impl.c @@ -0,0 +1,174 @@ + +// void +// create_hash_table_for_keyword(char *keyword, char *value, size_t len, void *data) +// { +// uint32_t hashVal = djb2_hash(keyword, (int)len); +// printf("%d:", hashVal); +// gen_obj_id_in_t in; +// gen_obj_id_out_t out; + +// in.data.obj_name = keyword; +// in.data.time_step = (int32_t)data; +// in.data.user_id = (uint32_t)data; +// char *taglist = (char *)calloc(256, sizeof(char)); +// printf("%s=%s", keyword, value); +// sprintf(taglist, "%s=%s", keyword, value); +// in.data.tags = taglist; +// in.data.data_location = " "; +// in.data.app_name = " "; +// in.data.ndim = 1; +// in.hash_value = hashVal; + +// PDC_insert_metadata_to_hash_table(&in, &out); +// } + +// int +// brutal_force_partial_search(metadata_query_transfer_in_t *in, uint32_t *n_meta, void ***buf_ptrs, +// char *k_query, char *vfrom_query, char *vto_query, uint32_t *hash_value) +// { +// int result = 0; + +// uint32_t iter = 0; +// HashTableIterator hash_table_iter; +// HashTableValue * head = NULL; +// pdc_metadata_t * elt; +// int n_entry; + +// if (metadata_hash_table_g != NULL) { +// if (hash_value != NULL) { +// head = hash_table_lookup(metadata_hash_table_g, hash_value); +// if (head != NULL) { +// DL_FOREACH(head->metadata, elt) +// { +// // List all objects, no need to check other constraints +// if (in->is_list_all == 1) { +// (*buf_ptrs)[iter++] = elt; +// } +// // check if current metadata matches search constraint +// else if (is_metadata_satisfy_constraint(elt, in) == 1) { +// (*buf_ptrs)[iter++] = elt; +// } +// } +// } +// } +// else { +// n_entry = hash_table_num_entries(metadata_hash_table_g); +// hash_table_iterate(metadata_hash_table_g, &hash_table_iter); + +// while (n_entry != 0 && hash_table_iter_has_more(&hash_table_iter)) { +// head = hash_table_iter_next(&hash_table_iter); +// DL_FOREACH(head->metadata, elt) +// { +// // List all objects, no need to check other constraints +// if (in->is_list_all == 1) { +// (*buf_ptrs)[iter++] = elt; +// } +// // check if current metadata matches search constraint +// else if (is_metadata_satisfy_constraint(elt, in) == 1) { +// (*buf_ptrs)[iter++] = elt; +// } +// } +// } +// } +// *n_meta = iter; + +// printf("==PDC_SERVER: brutal_force_partial_search: Total matching results: %d\n", *n_meta); +// result = 1; +// } // if (metadata_hash_table_g != NULL) +// else { +// printf("==PDC_SERVER: metadata_hash_table_g not initilized!\n"); +// result = 0; +// } + +// return result; +// } + +// void +// search_through_hash_table(char *k_query, uint32_t index_type, pattern_type_t pattern_type, +// pdc_art_iterator_param_t *param) +// { + +// metadata_query_transfer_in_t in; +// in.is_list_all = -1; +// in.user_id = -1; +// in.app_name = " "; +// in.obj_name = " "; +// in.time_step_from = -1; +// in.time_step_to = -1; +// in.ndim = -1; +// in.tags = " "; +// char * qType_string; +// uint32_t n_meta; +// void ** buf_ptrs; +// char * tok; + +// uint32_t *hash_ptr = NULL; +// uint32_t hash_value = -1; + +// switch (pattern_type) { +// case PATTERN_EXACT: +// qType_string = "Exact"; +// tok = k_query; +// if (index_type == 1) { +// hash_value = djb2_hash(tok, (int)strlen(tok)); +// hash_ptr = &hash_value; +// } +// else if (index_type == 2) { +// hash_value = djb2_hash(tok, 1); +// hash_ptr = &hash_value; +// } +// break; +// case PATTERN_PREFIX: +// qType_string = "Prefix"; +// tok = subrstr(k_query, strlen(k_query) - 1); +// if (index_type == 2) { +// hash_value = djb2_hash(tok, 1); +// hash_ptr = &hash_value; +// } +// else { +// hash_ptr = NULL; +// } +// break; +// case PATTERN_SUFFIX: +// qType_string = "Suffix"; +// tok = substr(k_query, 1); +// tok = reverse_str(tok); +// if (index_type == 2) { +// hash_value = djb2_hash(tok, 1); +// hash_ptr = &hash_value; +// } +// else { +// hash_ptr = NULL; +// } +// break; +// case PATTERN_MIDDLE: +// qType_string = "Infix"; +// tok = substring(k_query, 1, strlen(k_query) - 1); +// break; +// default: +// break; +// } + +// int search_rst = brutal_force_partial_search(&in, &n_meta, &buf_ptrs, k_query, NULL, NULL, hash_ptr); +// int i = 0; +// for (i = 0; i < n_meta; i++) { +// pdc_metadata_t *metadata = (pdc_metadata_t *)buf_ptrs[i]; +// hashset_add(param->out, (metadata->user_id)); +// param->total_count = param->total_count + 1; +// } +// } + +// void +// delete_hash_table_for_keyword(char *keyword, size_t len, void *data) +// { +// uint32_t hashVal = djb2_hash(keyword, (int)len); + +// metadata_delete_in_t in; +// metadata_delete_out_t out; + +// in.obj_name = keyword; +// in.time_step = (int32_t)data; +// in.hash_value = hashVal; + +// PDC_delete_metadata_from_hash_table(&in, &out); +// } \ No newline at end of file diff --git a/src/commons/index/dart/index/hashtable/include/hashtable_impl.h b/src/commons/index/dart/index/hashtable/include/hashtable_impl.h new file mode 100644 index 000000000..238273745 --- /dev/null +++ b/src/commons/index/dart/index/hashtable/include/hashtable_impl.h @@ -0,0 +1,25 @@ + +// /** +// * @brief Create the metadata index +// * @param in [IN] Input parameters for the create operation +// * @param out [OUT] Output parameters for the create operation +// * @return perr_t SUCCESS on success, FAIL on failure +// */ +// perr_t PDC_Server_metadata_index_create(metadata_index_create_in_t *in, metadata_index_create_out_t *out); + +// /** +// * @brief Delete the metadata index +// * @param in [IN] Input parameters for the delete operation +// * @param out [OUT] Output parameters for the delete operation +// * @return perr_t SUCCESS on success, FAIL on failure +// */ +// perr_t PDC_Server_metadata_index_delete(metadata_index_delete_in_t *in, metadata_index_delete_out_t *out); + +// /** +// * @brief Search the metadata index +// * @param in [IN] Input parameters for the search operation +// * @param out [OUT] Output parameters for the search operation +// * @return perr_t SUCCESS on success, FAIL on failure +// */ +// perr_t PDC_Server_metadata_index_search(metadata_index_search_in_t *in, metadata_index_search_out_t *out, +// uint64_t *n_obj_ids_ptr, uint64_t **buf_ptrs); \ No newline at end of file diff --git a/src/commons/index/dart/index/idioms/idioms_local_index.c b/src/commons/index/dart/index/idioms/idioms_local_index.c new file mode 100644 index 000000000..8f8309652 --- /dev/null +++ b/src/commons/index/dart/index/idioms/idioms_local_index.c @@ -0,0 +1,872 @@ +#include "comparators.h" +#include "rbtree.h" +#include "pdc_set.h" +#include "pdc_hash.h" +#include "idioms_local_index.h" +#include "bin_file_ops.h" +#include "pdc_hash_table.h" +#include "string_utils.h" +#include "query_utils.h" +#include "pdc_logger.h" +#include +#include +#include + +#define DART_SERVER_DEBUG 0 + +#define KV_DELIM '=' + +IDIOMS_t * +IDIOMS_init(uint32_t server_id, uint32_t num_servers) +{ + IDIOMS_t *idioms = (IDIOMS_t *)calloc(1, sizeof(IDIOMS_t)); + idioms->art_key_prefix_tree_g = (art_tree *)calloc(1, sizeof(art_tree)); + art_tree_init(idioms->art_key_prefix_tree_g); + + idioms->art_key_suffix_tree_g = (art_tree *)calloc(1, sizeof(art_tree)); + art_tree_init(idioms->art_key_suffix_tree_g); + + idioms->server_id_g = server_id; + idioms->num_servers_g = num_servers; + + idioms->dart_info_g = (DART *)calloc(1, sizeof(DART)); + _init_dart_space_via_idioms(idioms->dart_info_g, idioms->num_servers_g); + + return idioms; +} + +/****************************/ +/* Index Create */ +/****************************/ + +perr_t +insert_obj_ids_into_value_leaf(void *index, void *attr_val, int is_trie, size_t value_len, uint64_t *obj_ids, + size_t num_obj_ids) +{ + perr_t ret = SUCCEED; + // printf("index is %p, obj_id: %llu\n", index, obj_ids[0]); + if (index == NULL) { + return FAIL; + } + + void *entry = NULL; + int idx_found = -1; // -1 not found, 0 found. + if (is_trie) { + entry = art_search((art_tree *)index, (unsigned char *)attr_val, value_len); + idx_found = (entry != NULL) ? 0 : -1; + } + else { + idx_found = rbt_find((rbt_t *)index, attr_val, value_len, &entry); + } + + if (entry == NULL) { // not found + entry = (value_index_leaf_content_t *)PDC_calloc(1, sizeof(value_index_leaf_content_t)); + // create new set for obj_ids + Set *obj_id_set = set_new(ui64_hash, ui64_equal); + set_register_free_function(obj_id_set, free); + + ((value_index_leaf_content_t *)entry)->obj_id_set = obj_id_set; + if (is_trie) { + art_insert((art_tree *)index, (unsigned char *)attr_val, value_len, entry); + } + else { + rbt_add((rbt_t *)index, attr_val, value_len, entry); + } + } + + for (int j = 0; j < num_obj_ids; j++) { + // the set is directly taking the pointer to every entry, which means we have to allocate memory for + // every number stored in the set. + uint64_t *obj_id = (uint64_t *)PDC_calloc(1, sizeof(uint64_t)); + *obj_id = obj_ids[j]; + set_insert(((value_index_leaf_content_t *)entry)->obj_id_set, (SetValue)obj_id); + size_t num_entires = set_num_entries(((value_index_leaf_content_t *)entry)->obj_id_set); + } + return ret; +} + +perr_t +insert_value_into_second_level_index(key_index_leaf_content_t *leaf_content, + IDIOMS_md_idx_record_t * idx_record) +{ + perr_t ret = SUCCEED; + if (leaf_content == NULL) { + return FAIL; + } + char *value_type_str = get_enum_name_by_dtype(idx_record->type); + + if (_getCompoundTypeFromBitmap(leaf_content->val_idx_dtype) == PDC_STRING && + is_PDC_STRING(idx_record->type)) { + void * attr_val = stripQuotes(idx_record->value); + size_t value_str_len = strlen(attr_val); + ret = insert_obj_ids_into_value_leaf(leaf_content->primary_trie, attr_val, + idx_record->type == PDC_STRING, value_str_len, + idx_record->obj_ids, idx_record->num_obj_ids); + if (ret == FAIL) { + return ret; + } +#ifndef PDC_DART_SFX_TREE + void *reverted_val = reverse_str((char *)attr_val); + // LOG_DEBUG("reverted_val: %s\n", (char *)reverted_val); + // insert the value into the trie for suffix search. + ret = insert_obj_ids_into_value_leaf(leaf_content->secondary_trie, reverted_val, + idx_record->type == PDC_STRING, value_str_len, + idx_record->obj_ids, idx_record->num_obj_ids); +#else + int sub_loop_count = value_str_len; + for (int j = 1; j < sub_loop_count; j++) { + char *suffix = substring(attr_val, j, value_str_len); + // LOG_DEBUG("suffix: %s\n", suffix); + ret = insert_obj_ids_into_value_leaf(leaf_content->secondary_trie, suffix, + idx_record->type == PDC_STRING, value_str_len - j, + idx_record->obj_ids, idx_record->num_obj_ids); + } +#endif + } + if (_getNumericalTypeFromBitmap(leaf_content->val_idx_dtype) != PDC_UNKNOWN && + is_PDC_NUMERIC(idx_record->type)) { + ret = insert_obj_ids_into_value_leaf((rbt_t *)leaf_content->primary_rbt, idx_record->value, + idx_record->type == PDC_STRING, idx_record->value_len, + idx_record->obj_ids, idx_record->num_obj_ids); + } + return ret; +} + +perr_t +insert_into_key_trie(art_tree *key_trie, char *key, int len, IDIOMS_md_idx_record_t *idx_record) +{ + perr_t ret = SUCCEED; + if (key_trie == NULL) { + return FAIL; + } + + // look up for leaf_content + key_index_leaf_content_t *key_leaf_content = + (key_index_leaf_content_t *)art_search(key_trie, (unsigned char *)key, len); + + // create leaf_content node if not exist. + if (key_leaf_content == NULL) { + // create key_leaf_content node for the key. + key_leaf_content = (key_index_leaf_content_t *)PDC_calloc(1, sizeof(key_index_leaf_content_t)); + // insert the key into the the key trie along with the key_leaf_content. + art_insert(key_trie, (unsigned char *)key, len, (void *)key_leaf_content); + // LOG_DEBUG("Inserted key %s into the key trie\n", key); + } + + // fill the content of the leaf_content node, if necessary. + if (key_leaf_content->primary_trie == NULL && key_leaf_content->secondary_trie == NULL && + is_PDC_STRING(idx_record->type)) { + // the following gurarantees that both prefix index and suffix index are initialized. + key_leaf_content->primary_trie = (art_tree *)PDC_calloc(1, sizeof(art_tree)); + art_tree_init((art_tree *)key_leaf_content->primary_trie); + + key_leaf_content->secondary_trie = (art_tree *)PDC_calloc(1, sizeof(art_tree)); + art_tree_init((art_tree *)key_leaf_content->secondary_trie); + + _encodeTypeToBitmap(&(key_leaf_content->val_idx_dtype), idx_record->type); + } + if (key_leaf_content->primary_rbt == NULL) { + if (is_PDC_UINT(idx_record->type)) { + // TODO: This is a simplified implementation, but we need to have all the CMP_CB functions + // defined for all numerical types in libhl/comparators.h + key_leaf_content->primary_rbt = rbt_create_by_dtype(PDC_UINT64, PDC_free_void); + _encodeTypeToBitmap(&(key_leaf_content->val_idx_dtype), PDC_UINT64); + } + if (is_PDC_INT(idx_record->type)) { + key_leaf_content->primary_rbt = rbt_create_by_dtype(PDC_INT64, PDC_free_void); + _encodeTypeToBitmap(&(key_leaf_content->val_idx_dtype), PDC_INT64); + } + if (is_PDC_FLOAT(idx_record->type)) { + key_leaf_content->primary_rbt = rbt_create_by_dtype(PDC_DOUBLE, PDC_free_void); + _encodeTypeToBitmap(&(key_leaf_content->val_idx_dtype), PDC_DOUBLE); + } + } + + key_leaf_content->virtural_node_id = idx_record->virtual_node_id; + + // insert the value part into second level index. + ret = insert_value_into_second_level_index(key_leaf_content, idx_record); + return ret; +} + +perr_t +idioms_local_index_create(IDIOMS_t *idioms, IDIOMS_md_idx_record_t *idx_record) +{ + perr_t ret = SUCCEED; + // get the key and create key_index_leaf_content node for it. + char *key = idx_record->key; + int len = strlen(key); + + stopwatch_t index_timer; + timer_start(&index_timer); + art_tree *key_trie = + (idx_record->is_key_suffix == 1) ? idioms->art_key_suffix_tree_g : idioms->art_key_prefix_tree_g; + insert_into_key_trie(key_trie, key, len, idx_record); + /** + * Note: in IDIOMS, the client-runtime is responsible for iterating all suffixes of the key. + * Therefore, there is no need to insert the suffixes of the key into the key trie locally. + * Different suffixes of the key should be inserted into the key trie on different servers, + * distributed by DART. + * + * Therefore, the following logic is commented off. + */ + // #ifndef PDC_DART_SFX_TREE + // if (ret == FAIL) { + // return ret; + // } + // ret = insert_into_key_trie(idioms_g->art_key_suffix_tree, reverse_str(key), len, 0, + // idx_record); + // #else + // // insert every suffix of the key into the trie; + // int sub_loop_count = len; + // for (int j = 1; j < sub_loop_count; j++) { + // char *suffix = substring(key, j, len); + // // TODO: change delete and search functions for suffix/infix query on the suffix trie. + // ret = insert_into_key_trie(idioms_g->art_key_suffix_tree_g, suffix, strlen(suffix), 1, + // idx_record); if (ret == FAIL) { + // return ret; + // } + // } + // #endif + timer_pause(&index_timer); + + if (DART_SERVER_DEBUG) { + printf("[Server_Side_Insert_%d] Timer to insert a keyword %s : %s into index = %.4f microseconds\n", + idioms->server_id_g, key, idx_record->value, timer_delta_us(&index_timer)); + char value_str[64]; + if (idx_record->type == PDC_STRING) { + snprintf(value_str, 64, "%s", idx_record->value); + } + else if (is_PDC_UINT(idx_record->type)) { + snprintf(value_str, 64, "%" PRIu64, *((uint64_t *)idx_record->value)); + } + else if (is_PDC_INT(idx_record->type)) { + snprintf(value_str, 64, "%" PRId64, *((int64_t *)idx_record->value)); + } + else if (is_PDC_FLOAT(idx_record->type)) { + snprintf(value_str, 64, "%f", *((double *)idx_record->value)); + } + else { + snprintf(value_str, 64, "[UnknownValue]"); + } + printf("[idioms_local_index_create] Client %" PRIu32 " inserted a kvtag \"%s\" : \"%s\" -> %" PRIu64 + " into Server %" PRIu32 " in %.4f microseconds, insert_request_count_g = %" PRId64 + ", index_record_count_g = %" PRId64 "\n", + idx_record->src_client_id, key, value_str, idx_record->obj_ids[0], idioms->server_id_g, + timer_delta_us(&index_timer), idioms->insert_request_count_g, idioms->index_record_count_g); + } + idioms->time_to_create_index_g += timer_delta_us(&index_timer); + idioms->index_record_count_g++; + idioms->insert_request_count_g++; + + return ret; +} + +/****************************/ +/* Index Delete */ +/****************************/ +perr_t +delete_obj_ids_from_value_leaf(void *index, void *attr_val, int is_trie, size_t value_len, uint64_t *obj_ids, + size_t num_obj_ids) +{ + perr_t ret = SUCCEED; + if (index == NULL) { + return ret; + } + + void *entry = NULL; + int idx_found = -1; // -1 not found, 0 found. + if (is_trie) { + entry = art_search((art_tree *)index, (unsigned char *)attr_val, value_len); + idx_found = (entry != NULL) ? 0 : -1; + } + else { + idx_found = rbt_find((rbt_t *)index, attr_val, value_len, &entry); + } + + if (idx_found != 0) { // not found + return SUCCEED; + } + + uint64_t *obj_id = (uint64_t *)PDC_calloc(1, sizeof(uint64_t)); + for (int j = 0; j < num_obj_ids; j++) { + if (ret == FAIL) { + return ret; + } + // obj_id here is just for comparison purpose, and no need to allocate memory for it every time. + *obj_id = obj_ids[j]; + set_remove(((value_index_leaf_content_t *)entry)->obj_id_set, (SetValue)obj_id); + } + + PDC_free(obj_id); + + if (set_num_entries(((value_index_leaf_content_t *)entry)->obj_id_set) == 0) { + set_free(((value_index_leaf_content_t *)entry)->obj_id_set); + if (is_trie) { + art_delete((art_tree *)index, (unsigned char *)attr_val, value_len); + } + else { + rbt_remove((rbt_t *)index, attr_val, value_len, &entry); + } + // PDC_free(entry); + } + return ret; +} + +perr_t +delete_value_from_second_level_index(key_index_leaf_content_t *leaf_content, + IDIOMS_md_idx_record_t * idx_record) +{ + perr_t ret = SUCCEED; + if (leaf_content == NULL) { + return FAIL; + } + char *value_type_str = get_enum_name_by_dtype(idx_record->type); + if (_getCompoundTypeFromBitmap(leaf_content->val_idx_dtype) == PDC_STRING && + is_PDC_STRING(idx_record->type)) { + // delete the value from the prefix tree. + void *attr_val = stripQuotes(idx_record->value); + + size_t value_str_len = strlen(attr_val); + ret = delete_obj_ids_from_value_leaf(leaf_content->primary_trie, attr_val, + idx_record->type == PDC_STRING, value_str_len, + idx_record->obj_ids, idx_record->num_obj_ids); + if (ret == FAIL) { + return ret; + } +#ifndef PDC_DART_SFX_TREE + void *reverted_val = reverse_str((char *)attr_val); + LOG_DEBUG("DEL reverted_val: %s\n", (char *)reverted_val); + // when suffix tree mode is OFF, the secondary trie is used for indexing reversed value strings. + ret = delete_obj_ids_from_value_leaf(leaf_content->secondary_trie, reverted_val, + idx_record->type == PDC_STRING, value_str_len, + idx_record->obj_ids, idx_record->num_obj_ids); +#else + // when suffix tree mode is ON, the secondary trie is used for indexing suffixes of the value + // string. + int sub_loop_count = value_str_len; + for (int j = 1; j < sub_loop_count; j++) { + char *suffix = substring(attr_val, j, value_str_len); + ret = delete_obj_ids_from_value_leaf(leaf_content->secondary_trie, suffix, + idx_record->type == PDC_STRING, value_str_len - j, + idx_record->obj_ids, idx_record->num_obj_ids); + } +#endif + } + if (_getNumericalTypeFromBitmap(leaf_content->val_idx_dtype) != PDC_UNKNOWN && + is_PDC_NUMERIC(idx_record->type)) { + // delete the value from the primary rbtree index. here, value_len is the size of the value in + // bytes. + ret = delete_obj_ids_from_value_leaf(leaf_content->primary_rbt, idx_record->value, + idx_record->type == PDC_STRING, idx_record->value_len, + idx_record->obj_ids, idx_record->num_obj_ids); + } + return ret; +} + +int +is_key_leaf_cnt_empty(key_index_leaf_content_t *leaf_content) +{ + if (leaf_content->primary_trie == NULL && leaf_content->secondary_trie == NULL && + leaf_content->primary_rbt == NULL && leaf_content->secondary_rbt == NULL) { + return 1; + } + return 0; +} + +/** + * @validated + */ +perr_t +delete_from_key_trie(art_tree *key_trie, char *key, int len, IDIOMS_md_idx_record_t *idx_record) +{ + perr_t ret = SUCCEED; + if (key_trie == NULL) { + return FAIL; + } + // look up for leaf_content + key_index_leaf_content_t *key_leaf_content = + (key_index_leaf_content_t *)art_search(key_trie, (unsigned char *)key, len); + // if no corresponding leaf_content, that means the key has been deleted already. + if (key_leaf_content == NULL) { + return SUCCEED; + } + + // delete the value part from second level index. + ret = delete_value_from_second_level_index(key_leaf_content, idx_record); + if (ret == FAIL) { + return ret; + } + + char *value_type_str = get_enum_name_by_dtype(idx_record->type); + int count_in_value_index = 0; + + if (is_PDC_NUMERIC(idx_record->type)) { + if (rbt_size(key_leaf_content->primary_rbt) == 0) { + rbt_destroy(key_leaf_content->primary_rbt); + key_leaf_content->primary_rbt = NULL; + } + if (rbt_size(key_leaf_content->secondary_rbt) == 0) { + rbt_destroy(key_leaf_content->secondary_rbt); + key_leaf_content->secondary_rbt = NULL; + } + } + if (is_PDC_STRING(idx_record->type)) { + if (art_size(key_leaf_content->primary_trie) == 0) { + art_tree_destroy(key_leaf_content->primary_trie); + key_leaf_content->primary_trie = NULL; + } + if (art_size(key_leaf_content->secondary_trie) == 0) { + art_tree_destroy(key_leaf_content->secondary_trie); + key_leaf_content->secondary_trie = NULL; + } + } + + if (is_key_leaf_cnt_empty(key_leaf_content)) { + // delete the key from the the key trie along with the key_leaf_content. + free(key_leaf_content); + // LOG_DEBUG("Deleted key %s from the key trie\n", key); + art_delete(key_trie, (unsigned char *)key, len); + return SUCCEED; + } + + return ret; +} + +/** + * @validated + */ +perr_t +idioms_local_index_delete(IDIOMS_t *idioms, IDIOMS_md_idx_record_t *idx_record) +{ + perr_t ret = SUCCEED; + // get the key and create key_index_leaf_content node for it. + char *key = idx_record->key; // in a delete function for trie, there is no need to duplicate the string. + int len = strlen(key); + + stopwatch_t index_timer; + timer_start(&index_timer); + art_tree *key_trie = + (idx_record->is_key_suffix == 1) ? idioms->art_key_suffix_tree_g : idioms->art_key_prefix_tree_g; + delete_from_key_trie(key_trie, key, len, idx_record); + /** + * Note: in IDIOMS, the client-runtime is responsible for iterating all suffixes of the key. + * Therefore, there is no need to insert the suffixes of the key into the key trie locally. + * Different suffixes of the key should be inserted into the key trie on different servers, + * distributed by DART. + * + * Therefore, the following logic is commented off. + */ + // #ifndef PDC_DART_SFX_TREE + // if (ret == FAIL) { + // return ret; + // } + // ret = delete_from_key_trie(idioms_g->art_key_suffix_tree, reverse_str(key), len, idx_record); + // #else + // // insert every suffix of the key into the trie; + // int sub_loop_count = len; + // for (int j = 1; j < sub_loop_count; j++) { + // if (ret == FAIL) { + // return ret; + // } + // char *suffix = substring(key, j, len); + // ret = delete_from_key_trie(idioms_g->art_key_prefix_tree_g, suffix, strlen(suffix), + // idx_record); + // } + // #endif + timer_pause(&index_timer); + if (DART_SERVER_DEBUG) { + printf("[Server_Side_Delete_%d] Timer to delete a keyword %s : %s from index = %.4f microseconds\n", + idioms->server_id_g, key, idx_record->value, timer_delta_us(&index_timer)); + } + idioms->time_to_delete_index_g += timer_delta_us(&index_timer); + idioms->index_record_count_g--; + idioms->delete_request_count_g++; + return ret; +} + +/****************************/ +/* Index Search */ +/****************************/ + +int +collect_obj_ids(value_index_leaf_content_t *value_index_leaf, IDIOMS_md_idx_record_t *idx_record) +{ + Set *obj_id_set = (Set *)value_index_leaf->obj_id_set; + + // get number of object IDs in the set + int num_obj_ids = set_num_entries(obj_id_set); + // printf("[SEARCH] obj_id_set: %p, num_obj: %d\n", obj_id_set, num_obj_ids); + + // realloc the obj_ids array in idx_record + idx_record->obj_ids = + (uint64_t *)realloc(idx_record->obj_ids, sizeof(uint64_t) * (idx_record->num_obj_ids + num_obj_ids)); + size_t offset = idx_record->num_obj_ids; + SetIterator value_set_iter; + set_iterate(obj_id_set, &value_set_iter); + while (set_iter_has_more(&value_set_iter)) { + uint64_t *item = (uint64_t *)set_iter_next(&value_set_iter); + memcpy(idx_record->obj_ids + offset, item, sizeof(uint64_t)); + offset++; + } + if (offset - idx_record->num_obj_ids == num_obj_ids) { + idx_record->num_obj_ids += num_obj_ids; + } + else { + printf("ERROR: offset %zu != num_obj_ids %d\n", offset, num_obj_ids); + } + return 0; +} + +int +value_trie_callback(void *data, const unsigned char *key, uint32_t key_len, void *value) +{ + value_index_leaf_content_t *value_index_leaf = (value_index_leaf_content_t *)(value); + IDIOMS_md_idx_record_t * idx_record = (IDIOMS_md_idx_record_t *)(data); + + // printf("value_trie_callback: key: %s, value: %s, value_index_leaf: %p\n", key, (char + // *)idx_record->value, + // value_index_leaf); + char * v_query = (char *)idx_record->value; + pattern_type_t value_query_type = determine_pattern_type(v_query); + if (value_query_type == PATTERN_MIDDLE) { + char *infix = substring(v_query, 1, strlen(v_query) - 1); + if (contains((char *)key, infix) == 0) { + return 0; + } + } + + if (value_index_leaf != NULL) { + collect_obj_ids(value_index_leaf, idx_record); + } + return 0; +} + +rbt_walk_return_code_t +value_rbt_callback(rbt_t *rbt, void *key, size_t klen, void *value, void *priv) +{ + value_index_leaf_content_t *value_index_leaf = (value_index_leaf_content_t *)(value); + IDIOMS_md_idx_record_t * idx_record = (IDIOMS_md_idx_record_t *)(priv); + + // printf("value_rbt_callback: key: %s, value: %s, value_index_leaf: %p\n", (char *)key, + // (char *)idx_record->value, value_index_leaf); + + if (value_index_leaf != NULL) { + collect_obj_ids(value_index_leaf, idx_record); + } + return RBT_WALK_CONTINUE; +} + +/** + * The following queries are what we need to support + * 1. exact query -> key=|value| (key == value) + * 5. range query -> key=value~ (key > value) + * 6. range query -> key=~value (key < value) + * 7. range query -> key=value|~ (key >= value) + * 8. range query -> key=~|value (key <= value) + * 9. range query -> key=value1|~value2 (value1 <= key < value2) + * 10. range query -> key=value1~|value2 (value1 < key <= value2) + * 11. range query -> key=value1~value2 (value1 < key < value2) + * 11. range query -> key=value1|~|value2 (value1 <= key <= value2) + * + * When return 0, it is successful. + */ +int +value_number_query(char *secondary_query, key_index_leaf_content_t *leafcnt, + IDIOMS_md_idx_record_t *idx_record) +{ + if (leafcnt->primary_rbt == NULL) { + return 0; + } + + // allocate memory according to the val_idx_dtype for value 1 and value 2. + void *val1; + void *val2; + if (startsWith(secondary_query, "|") && startsWith(secondary_query, "|")) { + // exact number search + char * num_str = substring(secondary_query, 1, strlen(secondary_query) - 1); + size_t klen1 = + get_number_from_string(num_str, _getNumericalTypeFromBitmap(leafcnt->val_idx_dtype), &val1); + value_index_leaf_content_t *value_index_leaf = NULL; + rbt_find(leafcnt->primary_rbt, val1, klen1, (void **)&value_index_leaf); + if (value_index_leaf != NULL) { + collect_obj_ids(value_index_leaf, idx_record); + } + } + else if (startsWith(secondary_query, "~")) { + int endInclusive = secondary_query[1] == '|'; + // find all numbers that are smaller than the given number + int beginPos = endInclusive ? 2 : 1; + char * numstr = substring(secondary_query, beginPos, strlen(secondary_query)); + size_t klen1 = + get_number_from_string(numstr, _getNumericalTypeFromBitmap(leafcnt->val_idx_dtype), &val1); + rbt_range_lt(leafcnt->primary_rbt, val1, klen1, value_rbt_callback, idx_record, endInclusive); + } + else if (endsWith(secondary_query, "~")) { + int beginInclusive = secondary_query[strlen(secondary_query) - 2] == '|'; + int endPos = beginInclusive ? strlen(secondary_query) - 2 : strlen(secondary_query) - 1; + // find all numbers that are greater than the given number + char * numstr = substring(secondary_query, 0, endPos); + size_t klen1 = + get_number_from_string(numstr, _getNumericalTypeFromBitmap(leafcnt->val_idx_dtype), &val1); + rbt_range_gt(leafcnt->primary_rbt, val1, klen1, value_rbt_callback, idx_record, beginInclusive); + } + else if (contains(secondary_query, "~")) { + int num_tokens = 0; + char **tokens = NULL; + // the string is not ended or started with '~', and if it contains '~', it is a in-between query. + split_string(secondary_query, "~", &tokens, &num_tokens); + if (num_tokens != 2) { + printf("ERROR: invalid range query: %s\n", secondary_query); + return -1; + } + char *lo_tok = tokens[0]; + char *hi_tok = tokens[1]; + // lo_tok might be ended with '|', and hi_tok might be started with '|', to indicate inclusivity. + int beginInclusive = endsWith(lo_tok, "|"); + int endInclusive = startsWith(hi_tok, "|"); + char * lo_num_str = beginInclusive ? substring(lo_tok, 0, strlen(lo_tok) - 1) : lo_tok; + char * hi_num_str = endInclusive ? substring(hi_tok, 1, strlen(hi_tok)) : hi_tok; + size_t klen1 = + get_number_from_string(lo_num_str, _getNumericalTypeFromBitmap(leafcnt->val_idx_dtype), &val1); + size_t klen2 = + get_number_from_string(hi_num_str, _getNumericalTypeFromBitmap(leafcnt->val_idx_dtype), &val2); + + int num_visited_node = rbt_range_walk(leafcnt->primary_rbt, val1, klen1, val2, klen2, + value_rbt_callback, idx_record, beginInclusive, endInclusive); + // println("[value_number_query] num_visited_node: %d\n", num_visited_node); + } + else { + // exact query by default + // exact number search + char * num_str = strdup(secondary_query); + size_t klen1 = + get_number_from_string(num_str, _getNumericalTypeFromBitmap(leafcnt->val_idx_dtype), &val1); + value_index_leaf_content_t *value_index_leaf = NULL; + rbt_find(leafcnt->primary_rbt, val1, klen1, (void **)&value_index_leaf); + if (value_index_leaf != NULL) { + collect_obj_ids(value_index_leaf, idx_record); + } + // free(num_str); + } + return 0; +} + +int +value_string_query(char *secondary_query, key_index_leaf_content_t *leafcnt, + IDIOMS_md_idx_record_t *idx_record) +{ + pattern_type_t level_two_ptn_type = determine_pattern_type(secondary_query); + char * tok = NULL; + switch (level_two_ptn_type) { + case PATTERN_EXACT: + tok = secondary_query; + if (leafcnt->primary_trie != NULL) { + value_index_leaf_content_t *value_index_leaf = (value_index_leaf_content_t *)art_search( + leafcnt->primary_trie, (unsigned char *)tok, strlen(tok)); + if (value_index_leaf != NULL) { + value_trie_callback((void *)idx_record, (unsigned char *)tok, strlen(tok), + (void *)value_index_leaf); + } + } + break; + case PATTERN_PREFIX: + tok = subrstr(secondary_query, strlen(secondary_query) - 1); + if (leafcnt->primary_trie != NULL) { + art_iter_prefix((art_tree *)leafcnt->primary_trie, (unsigned char *)tok, strlen(tok), + value_trie_callback, idx_record); + } + break; + case PATTERN_SUFFIX: + tok = substr(secondary_query, 1); +#ifndef PDC_DART_SFX_TREE + tok = reverse_str(tok); + // LOG_DEBUG("reverted_val_tok: %s\n", tok); + if (leafcnt->secondary_trie != NULL) { + art_iter_prefix(leafcnt->secondary_trie, (unsigned char *)tok, strlen(tok), + value_trie_callback, idx_record); + } +#else + if (leafcnt->secondary_trie != NULL) { + value_index_leaf_content_t *value_index_leaf = (value_index_leaf_content_t *)art_search( + leafcnt->secondary_trie, (unsigned char *)tok, strlen(tok)); + if (value_index_leaf != NULL) { + value_trie_callback((void *)idx_record, (unsigned char *)tok, strlen(tok), + (void *)value_index_leaf); + } + } +#endif + break; + case PATTERN_MIDDLE: + tok = substring(secondary_query, 1, strlen(secondary_query) - 1); + if (leafcnt->primary_trie != NULL) { +#ifndef PDC_DART_SFX_TREE + art_iter(leafcnt->primary_trie, value_trie_callback, idx_record); +#else + art_iter_prefix(leafcnt->secondary_trie, (unsigned char *)tok, strlen(tok), + value_trie_callback, idx_record); +#endif + } + break; + default: + break; + } + return 0; +} + +int +key_index_search_callback(void *data, const unsigned char *key, uint32_t key_len, void *value) +{ + key_index_leaf_content_t *leafcnt = (key_index_leaf_content_t *)value; + IDIOMS_md_idx_record_t * idx_record = (IDIOMS_md_idx_record_t *)(data); + + char *k_query = idx_record->key; + char *v_query = idx_record->value; + + pattern_type_t key_query_type = determine_pattern_type(k_query); + if (key_query_type == PATTERN_MIDDLE) { + char *infix = substring(k_query, 1, strlen(k_query) - 1); + if (contains((char *)key, infix) == 0) { + return 0; + } + } + + pdc_c_var_type_t attr_type = is_string_query(v_query) ? PDC_STRING : idx_record->type; + + int query_rst = 0; + if (is_string_query(v_query)) { // this is a test based on only the v_query string. + // perform string search + char *bare_v_query = stripQuotes(v_query); + // the value_string_query function should check if the leafcnt matches with the query type or not. + query_rst |= value_string_query(bare_v_query, leafcnt, idx_record); + } + + if (is_number_query(v_query)) { + // perform number search + query_rst |= value_number_query(v_query, leafcnt, idx_record); + } + return query_rst; +} + +/** + * the query initially is passed via 'key' field. + * the format can be: + * 1. exact query -> key="value" + * 2. prefix query -> key="value*" + * 3. suffix query -> key="*value" + * 4. infix query -> key="*value*" + * + * TODO: The following queries are what we need to support + * 1. exact query -> key=|value| (key == value) + * 5. range query -> key=value~ (key > value) + * 6. range query -> key=~value (key < value) + * 7. range query -> key=value|~ (key >= value) + * 8. range query -> key=~|value (key <= value) + * 9. range query -> key=value1|~value2 (value1 <= key < value2) + * 10. range query -> key=value1~|value2 (value1 < key <= value2) + * 11. range query -> key=value1~value2 (value1 < key < value2) + * 11. range query -> key=value1|~|value2 (value1 <= key <= value2) + */ +uint64_t +idioms_local_index_search(IDIOMS_t *idioms, IDIOMS_md_idx_record_t *idx_record) +{ + uint64_t result_count = 0; + stopwatch_t index_timer; + if (idioms == NULL) { + println("[Server_Side_Query_%d] idioms is NULL.", idioms->server_id_g); + return result_count; + } + if (idx_record == NULL) { + return result_count; + } + + char *query = idx_record->key; + char *kdelim_ptr = strchr(query, (int)KV_DELIM); + + if (NULL == kdelim_ptr) { + if (DART_SERVER_DEBUG) { + println("[Server_Side_Query_%d]query string '%s' is not valid.", idioms->server_id_g, query); + } + return result_count; + } + + char *k_query = get_key(query, KV_DELIM); + char *v_query = get_value(query, KV_DELIM); + + if (DART_SERVER_DEBUG) { + println("[Server_Side_Query_%d] k_query = '%s' | v_query = '%s' ", idioms->server_id_g, k_query, + v_query); + } + + idx_record->key = k_query; + idx_record->value = v_query; + + timer_start(&index_timer); + + char * qType_string = "Exact"; + char * tok; + pattern_type_t level_one_ptn_type = determine_pattern_type(k_query); + key_index_leaf_content_t *leafcnt = NULL; + // if (index_type == DHT_FULL_HASH || index_type == DHT_INITIAL_HASH) { + // search_through_hash_table(k_query, index_type, level_one_ptn_type, param); + // } + // else { + switch (level_one_ptn_type) { + case PATTERN_EXACT: + qType_string = "Exact"; + tok = k_query; + leafcnt = (key_index_leaf_content_t *)art_search(idioms->art_key_prefix_tree_g, + (unsigned char *)tok, strlen(tok)); + if (leafcnt != NULL) { + key_index_search_callback((void *)idx_record, (unsigned char *)tok, strlen(tok), + (void *)leafcnt); + } + break; + case PATTERN_PREFIX: + qType_string = "Prefix"; + tok = substring(k_query, 0, strlen(k_query) - 1); + art_iter_prefix(idioms->art_key_prefix_tree_g, (unsigned char *)tok, strlen(tok), + key_index_search_callback, (void *)idx_record); + break; + case PATTERN_SUFFIX: + qType_string = "Suffix"; + tok = substring(k_query, 1, strlen(k_query)); +#ifndef PDC_DART_SFX_TREE + tok = reverse_str(tok); + // LOG_DEBUG("reversed tok: %s\n", tok); + art_iter_prefix(idioms->art_key_suffix_tree_g, (unsigned char *)tok, strlen(tok), + key_index_search_callback, (void *)idx_record); +#else + leafcnt = (key_index_leaf_content_t *)art_search(idioms->art_key_suffix_tree_g, + (unsigned char *)tok, strlen(tok)); + if (leafcnt != NULL) { + key_index_search_callback((void *)idx_record, (unsigned char *)tok, strlen(tok), + (void *)leafcnt); + } +#endif + break; + case PATTERN_MIDDLE: + qType_string = "Infix"; + tok = substring(k_query, 1, strlen(k_query) - 1); +#ifndef PDC_DART_SFX_TREE + art_iter(idioms->art_key_suffix_tree_g, key_index_search_callback, (void *)idx_record); +#else + art_iter_prefix(idioms->art_key_suffix_tree_g, (unsigned char *)tok, strlen(tok), + key_index_search_callback, (void *)idx_record); +#endif + break; + default: + break; + } + // } + + timer_pause(&index_timer); + if (DART_SERVER_DEBUG) { + printf("[Server_Side_%s_%d] Time to address query '%s' and get %d results = %.4f microseconds\n", + qType_string, idioms->server_id_g, query, idx_record->num_obj_ids, + timer_delta_us(&index_timer)); + } + idioms->time_to_search_index_g += timer_delta_us(&index_timer); + idioms->search_request_count_g += 1; + return result_count; +} diff --git a/src/commons/index/dart/index/idioms/idioms_local_index_test.c b/src/commons/index/dart/index/idioms/idioms_local_index_test.c new file mode 100644 index 000000000..cd42c5b7e --- /dev/null +++ b/src/commons/index/dart/index/idioms/idioms_local_index_test.c @@ -0,0 +1,499 @@ +#include "idioms_local_index.h" +#include "dart_core.h" +#include "bulki.h" +#include "assert.h" + +typedef enum { IDIOMS_INSERT = 1, IDIOMS_DELETE = 2, IDIOMS_QUERY = 3 } IDIOMS_OP_TYPE; + +typedef struct { + IDIOMS_t *idioms; + void * buffer_in; + size_t buffer_in_size; + void * buffer_out; + size_t buffer_out_size; + int id; +} dummy_server_t; + +typedef struct { + DART * dart; + int DART_ALPHABET_SIZE; + int num_servers; + int extra_tree_height; + int replication_factor; + int dart_insert_count; + void * buffer_in; + size_t buffer_in_size; + void * buffer_out; + size_t buffer_out_size; + int id; +} dummy_client_t; + +dummy_server_t *servers; +dummy_client_t *clients; + +void +init_servers(int num_servers) +{ + // create an array of dummy_server_t + servers = (dummy_server_t *)malloc(num_servers * sizeof(dummy_server_t)); + // initialize each server, simulating the initialization of every single process + for (int i = 0; i < num_servers; i++) { + servers[i].id = i; + servers[i].idioms = IDIOMS_init(i, num_servers); + } +} + +void +init_clients(int num_clients, int num_servers) +{ + // create an array of dummy_client_t + clients = (dummy_client_t *)malloc(num_clients * sizeof(dummy_client_t)); + // initialize each client, simulating the initialization of every single process + for (int i = 0; i < num_clients; i++) { + clients[i].id = i; + clients[i].dart = (DART *)calloc(1, sizeof(DART)); + clients[i].num_servers = num_servers; + clients[i].DART_ALPHABET_SIZE = 27; + clients[i].extra_tree_height = 0; + clients[i].replication_factor = clients[i].replication_factor > 0 ? clients[i].replication_factor : 2; + clients[i].replication_factor = 1; // pdc_server_num_g / 10; + } + + // simulate the initialization of the DART space + for (int i = 0; i < num_clients; i++) { + dart_space_init(clients[i].dart, clients[i].num_servers); + } +} + +/** + * message format: + * { + * "message_type": "IDIOMS_query", // can be "IDIOMS_insert", "IDIOMS_delete", "IDIOMS_query" + * "key": "key1", // can be query string if IDIOMS_query, and can be key if IDIOMS_insert or IDIOMS_delete. + * "value": "value1", // optional field + * "id": "id1", // optional field + * } + */ +void +client_generate_request(dummy_client_t *client, IDIOMS_OP_TYPE op_type, char *key, BULKI_Entity *value_ent, + uint64_t *id) +{ + + BULKI_Entity *bentArr = empty_Bent_Array_Entity(); + BULKI_ENTITY_append_BULKI_Entity(bentArr, + BULKI_ENTITY(&op_type, 1, PDC_INT8, PDC_CLS_ITEM)); // 0. op_type + BULKI_ENTITY_append_BULKI_Entity(bentArr, BULKI_ENTITY(key, 1, PDC_STRING, PDC_CLS_ITEM)); // 1. key + + if (op_type == IDIOMS_INSERT || op_type == IDIOMS_DELETE) { + BULKI_ENTITY_append_BULKI_Entity(bentArr, value_ent); // 2. value + BULKI_ENTITY_append_BULKI_Entity(bentArr, BULKI_ENTITY(id, 1, PDC_UINT64, PDC_CLS_ITEM)); // 3. id + } + client->buffer_out_size = get_BULKI_Entity_size(bentArr); + client->buffer_out = BULKI_Entity_serialize(bentArr); +} + +void +get_server_info_cb(dart_server *server_ptr) +{ + uint32_t server_id = server_ptr->id; + servers[server_id].idioms->index_record_count_g = 100; +} + +int +client_select_server(dummy_client_t *client, char *attr_key, IDIOMS_OP_TYPE op_type, + index_hash_result_t **hash_result) +{ + // select a server based on the key + dart_op_type_t dart_op = OP_INSERT; + char * input_key = attr_key; + get_server_info_callback gsi_cp = NULL; + if (op_type == IDIOMS_INSERT) { + dart_op = OP_INSERT; + gsi_cp = get_server_info_cb; + } + else if (op_type == IDIOMS_QUERY) { + dart_determine_query_token_by_key_query(attr_key, &input_key, &dart_op); + } + else if (op_type == IDIOMS_DELETE) { + dart_op = OP_DELETE; + } + return DART_hash(client->dart, attr_key, dart_op, gsi_cp, hash_result); +} + +void +sending_request_to_server(dummy_client_t *client, dummy_server_t *server) +{ + // memcpy to simulate the sending of the request to the server + server->buffer_in_size = client->buffer_out_size; + server->buffer_in = (void *)malloc(server->buffer_in_size); + memcpy(server->buffer_in, client->buffer_out, server->buffer_in_size); + free(client->buffer_out); +} + +void +get_response_from_server(dummy_client_t *client, dummy_server_t *server) +{ + // memcpy to simulate the receiving of the response from the server + client->buffer_in_size = server->buffer_out_size; + client->buffer_in = (void *)malloc(client->buffer_in_size); + memcpy(client->buffer_in, server->buffer_out, client->buffer_in_size); + free(server->buffer_out); +} + +perr_t +server_perform_query(dummy_server_t *server, char *query_str, uint64_t **object_id_list, uint64_t *count) +{ + IDIOMS_md_idx_record_t *idx_record = (IDIOMS_md_idx_record_t *)calloc(1, sizeof(IDIOMS_md_idx_record_t)); + idx_record->key = query_str; + idioms_local_index_search(server->idioms, idx_record); + *object_id_list = idx_record->obj_ids; + *count = idx_record->num_obj_ids; + return SUCCEED; +} + +perr_t +server_perform_insert(dummy_server_t *server, char *key, BULKI_Entity *value_ent, uint64_t id) +{ + // we assume that the count of value_ent is 1. + IDIOMS_md_idx_record_t *idx_record = (IDIOMS_md_idx_record_t *)calloc(1, sizeof(IDIOMS_md_idx_record_t)); + idx_record->key = key; + idx_record->value = value_ent->data; + idx_record->value_len = value_ent->size; + idx_record->type = value_ent->pdc_type; + idx_record->num_obj_ids = 1; + idx_record->obj_ids = (uint64_t *)calloc(1, sizeof(uint64_t)); + idx_record->obj_ids[0] = id; + idioms_local_index_create(server->idioms, idx_record); + return SUCCEED; +} + +perr_t +server_perform_delete(dummy_server_t *server, char *key, BULKI_Entity *value_ent, uint64_t id) +{ + IDIOMS_md_idx_record_t *idx_record = (IDIOMS_md_idx_record_t *)calloc(1, sizeof(IDIOMS_md_idx_record_t)); + idx_record->key = key; + idx_record->value = value_ent->data; + idx_record->value_len = value_ent->size; + idx_record->type = value_ent->pdc_type; + idx_record->num_obj_ids = 1; + idx_record->obj_ids = (uint64_t *)calloc(1, sizeof(uint64_t)); + idx_record->obj_ids[0] = id; + idioms_local_index_delete(server->idioms, idx_record); + return SUCCEED; +} + +void +server_perform_operation(dummy_server_t *server) +{ + // printf("Perform operation on server %d\n", server->id); + BULKI_Entity * resultBent = empty_Bent_Array_Entity(); + BULKI_Entity * bentArr = BULKI_Entity_deserialize(server->buffer_in); + BULKI_Entity * opType_ent = BULKI_ENTITY_get_BULKI_Entity(bentArr, 0); + BULKI_Entity * key_ent = BULKI_ENTITY_get_BULKI_Entity(bentArr, 1); + char * key = (char *)key_ent->data; + perr_t result = SUCCEED; + IDIOMS_OP_TYPE opType = *(int8_t *)opType_ent->data; + uint64_t * obj_id_list = NULL; + uint64_t count = 0; + if (opType == IDIOMS_QUERY) { + result = server_perform_query(server, key, &obj_id_list, &count); + } + else { + BULKI_Entity *value_ent = BULKI_ENTITY_get_BULKI_Entity(bentArr, 2); + BULKI_Entity *id_ent = BULKI_ENTITY_get_BULKI_Entity(bentArr, 3); + uint64_t id = *(uint64_t *)id_ent->data; + if (opType == IDIOMS_INSERT) { + result = server_perform_insert(server, key, value_ent, id); + } + else if (opType == IDIOMS_DELETE) { + result = server_perform_delete(server, key, value_ent, id); + } + } + BULKI_ENTITY_append_BULKI_Entity(resultBent, BULKI_ENTITY(&result, 1, PDC_INT32, PDC_CLS_ITEM)); + if (count > 0) { + BULKI_ENTITY_append_BULKI_Entity(resultBent, + BULKI_ENTITY(obj_id_list, count, PDC_UINT64, PDC_CLS_ARRAY)); + } + server->buffer_out_size = get_BULKI_Entity_size(resultBent); + server->buffer_out = BULKI_Entity_serialize(resultBent); + free(server->buffer_in); +} + +perr_t +client_parse_response(dummy_client_t *client, uint64_t **obj_id_list, uint64_t *count) +{ + BULKI_Entity *resultBent = BULKI_Entity_deserialize(client->buffer_in); + int result = *(int *)BULKI_ENTITY_get_BULKI_Entity(resultBent, 0)->data; + if (result == SUCCEED && obj_id_list != NULL && count != NULL) { + BULKI_Entity *obj_id_bent = BULKI_ENTITY_get_BULKI_Entity(resultBent, 1); + if (obj_id_bent != NULL && obj_id_bent->count > 0) { + *obj_id_list = (uint64_t *)obj_id_bent->data; + *count = obj_id_bent->count; + } + } + free(client->buffer_in); + return result; +} + +perr_t +client_insert_data(dummy_client_t *client, int id) +{ + char key[40]; + char value[40]; + sprintf(key, "%d_%d", id, id); + sprintf(value, "%d_%d_%d", id, id, id); + uint64_t u64_id = (uint64_t)id; + perr_t result = SUCCEED; + // generate a request for each client + index_hash_result_t *hash_result = NULL; + int num_selected_srvs = client_select_server(client, key, IDIOMS_INSERT, &hash_result); + for (int s = 0; s < num_selected_srvs; s++) { + // client insert string value + BULKI_Entity *value_ent = BULKI_ENTITY(value, 1, PDC_STRING, PDC_CLS_ITEM); + client_generate_request(client, IDIOMS_INSERT, key, value_ent, &u64_id); + sending_request_to_server(client, &servers[hash_result[s].server_id]); + server_perform_operation(&servers[hash_result[s].server_id]); + get_response_from_server(client, &servers[hash_result[s].server_id]); + result |= client_parse_response(client, NULL, NULL); + + // client insert numeric value + int32_t i32_id = (int32_t)id; + BULKI_Entity *value_ent2 = BULKI_ENTITY(&i32_id, 1, PDC_INT32, PDC_CLS_ITEM); + client_generate_request(client, IDIOMS_INSERT, key, value_ent2, &u64_id); + sending_request_to_server(client, &servers[hash_result[s].server_id]); + server_perform_operation(&servers[hash_result[s].server_id]); + get_response_from_server(client, &servers[hash_result[s].server_id]); + result |= client_parse_response(client, NULL, NULL); + } + char *result_str = result == SUCCEED ? "SUCCEED" : "FAILED"; + printf("Insert result: %s\n", result_str); + return result; +} + +void +client_print_result(uint64_t *rst_ids, uint64_t rst_count) +{ + printf("Result count : %" PRIu64 " | ", rst_count); + for (int i = 0; i < rst_count; i++) { + printf("%lu ", rst_ids[i]); + } + printf("|\n"); +} + +uint64_t +client_perform_search(dummy_client_t *client, char *query, uint64_t **rst_ids) +{ + if (rst_ids == NULL) { + return 0; + } + // generate a request for each client + index_hash_result_t *hash_result = NULL; + int num_selected_srvs = client_select_server(client, query, IDIOMS_QUERY, &hash_result); + *rst_ids = NULL; + uint64_t rst_count = 0; + for (int s = 0; s < num_selected_srvs; s++) { + client_generate_request(client, IDIOMS_QUERY, query, NULL, NULL); + sending_request_to_server(client, &servers[hash_result[s].server_id]); + server_perform_operation(&servers[hash_result[s].server_id]); + get_response_from_server(client, &servers[hash_result[s].server_id]); + uint64_t *obj_id_list = NULL; + uint64_t count = 0; + perr_t result = client_parse_response(client, &obj_id_list, &count); + if (result == SUCCEED && count > 0) { + rst_count += count; + if (*rst_ids == NULL) { + *rst_ids = (uint64_t *)malloc(rst_count * sizeof(uint64_t)); + } + else { + *rst_ids = (uint64_t *)realloc(*rst_ids, rst_count * sizeof(uint64_t)); + } + memcpy(*rst_ids + rst_count - count, obj_id_list, count * sizeof(uint64_t)); + } + } + client_print_result(*rst_ids, rst_count); + return rst_count; +} + +perr_t +client_delete_data(dummy_client_t *client, int id) +{ + char key[20]; + char value[20]; + sprintf(key, "%d_%d", id, id); + sprintf(value, "%d_%d_%d", id, id, id); + uint64_t u64_id = (uint64_t)id; + // generate a request for each client + index_hash_result_t *hash_result = NULL; + int num_selected_srvs = client_select_server(client, key, IDIOMS_DELETE, &hash_result); + perr_t result = SUCCEED; + for (int s = 0; s < num_selected_srvs; s++) { + // delete string value + BULKI_Entity *value_ent = BULKI_ENTITY(value, 1, PDC_STRING, PDC_CLS_ITEM); + client_generate_request(client, IDIOMS_DELETE, key, value_ent, &u64_id); + sending_request_to_server(client, &servers[hash_result[s].server_id]); + server_perform_operation(&servers[hash_result[s].server_id]); + get_response_from_server(client, &servers[hash_result[s].server_id]); + result |= client_parse_response(client, NULL, NULL); + + // delete numeric value + int32_t i32_id = (int32_t)id; + BULKI_Entity *value_ent2 = BULKI_ENTITY(&i32_id, 1, PDC_INT32, PDC_CLS_ITEM); + client_generate_request(client, IDIOMS_DELETE, key, value_ent2, &u64_id); + sending_request_to_server(client, &servers[hash_result[s].server_id]); + server_perform_operation(&servers[hash_result[s].server_id]); + get_response_from_server(client, &servers[hash_result[s].server_id]); + result |= client_parse_response(client, NULL, NULL); + } + char *result_str = result == SUCCEED ? "SUCCEED" : "FAILED"; + printf("Delete result: %s\n", result_str); + return result; +} + +void +basic_test() +{ + perr_t insert_rst = client_insert_data(&clients[0], 10); + assert(insert_rst == SUCCEED); + + char query[100]; + // exact query + sprintf(query, "%d_%d=\"%d_%d_%d\"", 10, 10, 10, 10, 10); + uint64_t *rst_ids = NULL; + uint64_t rst_count = client_perform_search(&clients[0], query, &rst_ids); + assert(rst_count == 1); + assert(rst_ids[0] == 10); + + // prefix search + sprintf(query, "%d_%d=\"%d_*\"", 10, 10, 10); + rst_ids = NULL; + rst_count = client_perform_search(&clients[0], query, &rst_ids); + assert(rst_count == 1); + assert(rst_ids[0] == 10); + + // suffix search + sprintf(query, "%d_%d=\"*_%d\"", 10, 10, 10); + rst_ids = NULL; + rst_count = client_perform_search(&clients[0], query, &rst_ids); + assert(rst_count == 1); + assert(rst_ids[0] == 10); + + // infix search + sprintf(query, "%d_%d=\"*_%d_*\"", 10, 10, 10); + rst_ids = NULL; + rst_count = client_perform_search(&clients[0], query, &rst_ids); + assert(rst_count == 1); + assert(rst_ids[0] == 10); + + // numeric exact query + sprintf(query, "%d_%d=|%d|", 10, 10, 10); + rst_ids = NULL; + rst_count = client_perform_search(&clients[0], query, &rst_ids); + assert(rst_count >= 1); + assert(rst_ids[0] == 10); + + // numeric range query + sprintf(query, "%d_%d=%d|~|%d", 10, 10, 9, 11); + rst_ids = NULL; + rst_count = client_perform_search(&clients[0], query, &rst_ids); + assert(rst_count == 1); + assert(rst_ids[0] == 10); + + perr_t delete_rst = client_delete_data(&clients[0], 10); + assert(delete_rst == SUCCEED); + + sprintf(query, "%d_%d=\"%d_%d\"", 10, 10); + rst_ids = NULL; + rst_count = client_perform_search(&clients[0], query, &rst_ids); + assert(rst_count == 0); + assert(rst_ids == NULL); + + // prefix search + sprintf(query, "%d_%d=\"%d_*\"", 10, 10, 10); + rst_ids = NULL; + rst_count = client_perform_search(&clients[0], query, &rst_ids); + assert(rst_count == 0); + assert(rst_ids == NULL); + + // suffix search + sprintf(query, "%d_%d=\"*_%d\"", 10, 10, 10); + rst_ids = NULL; + rst_count = client_perform_search(&clients[0], query, &rst_ids); + assert(rst_count == 0); + assert(rst_ids == NULL); + + // infix search + sprintf(query, "%d_%d=\"*_%d_*\"", 10, 10, 10); + rst_ids = NULL; + rst_count = client_perform_search(&clients[0], query, &rst_ids); + assert(rst_count == 0); + assert(rst_ids == NULL); + + // numeric exact query + sprintf(query, "%d_%d=|%d|", 10, 10, 10); + rst_ids = NULL; + rst_count = client_perform_search(&clients[0], query, &rst_ids); + assert(rst_count == 0); + assert(rst_ids == NULL); + + // numeric range query + sprintf(query, "%d_%d=%d|~|%d", 10, 10, 9, 11); + rst_ids = NULL; + rst_count = client_perform_search(&clients[0], query, &rst_ids); + assert(rst_count == 0); + assert(rst_ids == NULL); +} + +int +main(int argc, char *argv[]) +{ + // read number of servers from first console argument + int num_servers = atoi(argv[1]); + // read number of clients from second console argument + int num_clients = atoi(argv[2]); + + // create an array of dummy_server_t + init_servers(num_servers); + + // create an array of dummy_client_t + init_clients(num_clients, num_servers); + + basic_test(); + + // // insert data + // for (int i = 0; i < num_clients; i++) { + // for (int id = 10000; id < 20000; id++) { + // if (id % num_clients != i) + // continue; + // client_insert_data(&clients[i], id); + // } + // } + + // // // search data + // for (int i = 0; i < num_clients; i++) { + // for (int id = 10000; id < 20000; id++) { + // if (id % num_clients != i) + // continue; + // client_perform_search(&clients[i], id); + // } + // } + + // // // delete data + // for (int i = 0; i < num_clients; i++) { + // for (int id = 10000; id < 20000; id++) { + // if (id % num_clients != i) + // continue; + // client_delete_data(&clients[i], id); + // } + // } + + // // // search data + // for (int i = 0; i < num_clients; i++) { + // for (int id = 10000; id < 20000; id++) { + // if (id % num_clients != i) + // continue; + // client_perform_search(&clients[i], id); + // } + // } + + return 0; +} \ No newline at end of file diff --git a/src/commons/index/dart/index/idioms/idioms_persistence.c b/src/commons/index/dart/index/idioms/idioms_persistence.c new file mode 100644 index 000000000..8b2b36ac6 --- /dev/null +++ b/src/commons/index/dart/index/idioms/idioms_persistence.c @@ -0,0 +1,506 @@ +#include "idioms_persistence.h" +#include "comparators.h" +#include "rbtree.h" +#include "pdc_set.h" +#include "pdc_hash.h" +#include "bin_file_ops.h" +#include "pdc_hash_table.h" +#include "dart_core.h" +#include "string_utils.h" +#include "query_utils.h" +#include "pdc_logger.h" +#include +#include +#include +#include "bulki_serde.h" + +/****************************/ +/* Index Dump */ +/****************************/ + +// ********************* Index Dump and Load ********************* + +/** + * This is a object ID set + * |number of object IDs = n|object ID 1|...|object ID n| + * + * validated. + */ +uint64_t +append_obj_id_set(Set *obj_id_set, BULKI_Entity *id_set_entity) +{ + uint64_t num_obj_id = set_num_entries(obj_id_set); + SetIterator iter; + set_iterate(obj_id_set, &iter); + while (set_iter_has_more(&iter)) { + uint64_t * item = (uint64_t *)set_iter_next(&iter); + BULKI_Entity *id_entity = BULKI_ENTITY(item, 1, PDC_UINT64, PDC_CLS_ITEM); + BULKI_ENTITY_append_BULKI_Entity(id_set_entity, id_entity); + } + return num_obj_id + 1; +} + +int +append_value_tree_node(void *v_id_bulki, void *key, uint32_t key_size, pdc_c_var_type_t key_type, void *value) +{ + BULKI *bulki = (BULKI *)v_id_bulki; + // entity for the key + BULKI_Entity *key_entity = + BULKI_ENTITY(key, key_type == PDC_STRING ? 1 : key_size, key_type, PDC_CLS_ITEM); + + BULKI_Entity *id_set_entity = BULKI_get(bulki, key_entity); + + if (id_set_entity == NULL) { + id_set_entity = empty_Bent_Array_Entity(); + } + + value_index_leaf_content_t *value_index_leaf = (value_index_leaf_content_t *)(value); + if (value_index_leaf != NULL) { + Set *obj_id_set = (Set *)value_index_leaf->obj_id_set; + append_obj_id_set(obj_id_set, id_set_entity); + } + + BULKI_put(bulki, key_entity, id_set_entity); + + return 0; // return 0 for art iteration to continue; +} + +/** + * This is a string value node + * |str_val|file_obj_pair_list| + */ +int +append_string_value_node(void *v_id_bulki, const unsigned char *key, uint32_t key_len, void *value) +{ + return append_value_tree_node(v_id_bulki, (void *)key, key_len, PDC_STRING, value); +} + +rbt_walk_return_code_t +append_numeric_value_node(rbt_t *rbt, void *key, size_t klen, void *value, void *v_id_bulki) +{ + append_value_tree_node(v_id_bulki, key, klen, rbt_get_dtype(rbt), value); + return RBT_WALK_CONTINUE; +} + +/** + * This is the string value region + * |type = 3|number of values = n|value_node_1|...|value_node_n| + * + * return number of strings in the string value tree + */ +uint64_t +append_string_value_tree(art_tree *art, BULKI *v_id_bulki) +{ + uint64_t rst = art_iter(art, append_string_value_node, v_id_bulki); + return rst; +} + +/** + * This is the numeric value region + * |type = 0/1/2|number of values = n|value_node_1|...|value_node_n| + * + * return number of numeric values in the numeric value tree + */ +uint64_t +append_numeric_value_tree(rbt_t *rbt, BULKI *v_id_bulki) +{ + uint64_t rst = rbt_walk(rbt, append_numeric_value_node, v_id_bulki); + return rst; +} + +/** + * return number of attribute values + * This is an attribute node + * |attr_name|attr_value_region| + */ +int +append_attr_name_node(void *data, const unsigned char *key, uint32_t key_len, void *value) +{ + int rst = 0; + + key_index_leaf_content_t *leafcnt = (key_index_leaf_content_t *)value; + HashTable * vnode_buf_ht = (HashTable *)data; // data is the parameter passed in + // the hash table is used to store the buffer struct related to each vnode id. + BULKI *kv_bulki = hash_table_lookup(vnode_buf_ht, &(leafcnt->virtural_node_id)); + // index_buffer_t *buffer = hash_table_lookup(vnode_buf_ht, &(leafcnt->virtural_node_id)); + if (kv_bulki == NULL) { + kv_bulki = BULKI_init(1); // one key-value pair + hash_table_insert(vnode_buf_ht, &(leafcnt->virtural_node_id), kv_bulki); + } + // printf("[PERSISTENCE]key = %s\n", key); + BULKI_Entity *key_entity = BULKI_ENTITY((void *)key, 1, PDC_STRING, PDC_CLS_ITEM); + + BULKI_Entity *data_entity = BULKI_get(kv_bulki, key_entity); + if (data_entity == NULL) { + // initilize data entity + data_entity = BULKI_ENTITY(BULKI_init(4), 1, PDC_BULKI, PDC_CLS_ITEM); + } + + BULKI *tree_bulki = ((BULKI *)data_entity->data); + // append the value type + if (_getCompoundTypeFromBitmap(leafcnt->val_idx_dtype) == PDC_STRING) { + if (leafcnt->primary_trie != NULL) { + BULKI *v_id_bulki = BULKI_init(1); + rst |= append_string_value_tree(leafcnt->primary_trie, v_id_bulki); + BULKI_put(tree_bulki, BULKI_ENTITY("primary_trie", 1, PDC_STRING, PDC_CLS_ITEM), + BULKI_ENTITY(v_id_bulki, 1, PDC_BULKI, PDC_CLS_ITEM)); + } + if (leafcnt->secondary_trie != NULL) { + BULKI *v_id_bulki = BULKI_init(1); + rst |= append_string_value_tree(leafcnt->secondary_trie, v_id_bulki); + BULKI_put(tree_bulki, BULKI_ENTITY("secondary_trie", 1, PDC_STRING, PDC_CLS_ITEM), + BULKI_ENTITY(v_id_bulki, 1, PDC_BULKI, PDC_CLS_ITEM)); + } + } + + if (_getNumericalTypeFromBitmap(leafcnt->val_idx_dtype) != PDC_UNKNOWN) { + if (leafcnt->primary_rbt != NULL) { + BULKI *v_id_bulki = BULKI_init(1); + rst |= append_numeric_value_tree(leafcnt->primary_rbt, v_id_bulki); + BULKI_put(tree_bulki, BULKI_ENTITY("primary_rbt", 1, PDC_STRING, PDC_CLS_ITEM), + BULKI_ENTITY(v_id_bulki, 1, PDC_BULKI, PDC_CLS_ITEM)); + } + if (leafcnt->secondary_rbt != NULL) { + BULKI *v_id_bulki = BULKI_init(1); + rst |= append_numeric_value_tree(leafcnt->secondary_rbt, v_id_bulki); + BULKI_put(tree_bulki, BULKI_ENTITY("secondary_rbt", 1, PDC_STRING, PDC_CLS_ITEM), + BULKI_ENTITY(v_id_bulki, 1, PDC_BULKI, PDC_CLS_ITEM)); + } + } + // add the kv pair to the bulki structure + BULKI_put(kv_bulki, key_entity, data_entity); + return 0; // return 0 for art iteration to continue; +} + +/** + * Append the IDIOMS root to the file + * + */ +int +dump_attr_root_tree(art_tree *art, char *dir_path, char *base_name, uint32_t serverID) +{ + HashTable *vid_buf_hash = + hash_table_new(pdc_default_uint64_hash_func_ptr, pdc_default_uint64_equal_func_ptr); + int rst = art_iter(art, append_attr_name_node, vid_buf_hash); + + int n_entry = hash_table_num_entries(vid_buf_hash); + // iterate the hashtable and store the buffers into the file corresponds to the vnode id + HashTableIterator iter; + hash_table_iterate(vid_buf_hash, &iter); + while (n_entry != 0 && hash_table_iter_has_more(&iter)) { + HashTablePair pair = hash_table_iter_next(&iter); + // vnode ID. On different server, there can be the same vnode ID at this line below + uint64_t *vid = pair.key; + BULKI * bulki = pair.value; + char file_name[1024]; + // and this is why do we need to differentiate the file name by the server ID. + sprintf(file_name, "%s/%s_%" PRIu32 "_%" PRIu64 ".bin", dir_path, base_name, serverID, *vid); + LOG_INFO("Writing index to file_name: %s\n", file_name); + FILE *stream = fopen(file_name, "wb"); + BULKI_serialize_to_file(bulki, stream); + } + return rst; +} + +void +dump_dart_info(DART *dart, char *dir_path, uint32_t serverID) +{ + if (serverID == 0) { + char file_name[1024]; + sprintf(file_name, "%s/%s.bin", dir_path, "dart_info"); + LOG_INFO("Writing DART info to file_name: %s\n", file_name); + FILE * stream = fopen(file_name, "wb"); + BULKI_Entity *dart_ent = empty_Bent_Array_Entity(); + BULKI_ENTITY_append_BULKI_Entity(dart_ent, + BULKI_ENTITY(&(dart->alphabet_size), 1, PDC_INT, PDC_CLS_ITEM)); + BULKI_ENTITY_append_BULKI_Entity(dart_ent, + BULKI_ENTITY(&(dart->dart_tree_height), 1, PDC_INT, PDC_CLS_ITEM)); + BULKI_ENTITY_append_BULKI_Entity(dart_ent, + BULKI_ENTITY(&(dart->replication_factor), 1, PDC_INT, PDC_CLS_ITEM)); + BULKI_ENTITY_append_BULKI_Entity( + dart_ent, BULKI_ENTITY(&(dart->client_request_count), 1, PDC_INT, PDC_CLS_ITEM)); + BULKI_ENTITY_append_BULKI_Entity(dart_ent, + BULKI_ENTITY(&(dart->num_server), 1, PDC_UINT32, PDC_CLS_ITEM)); + BULKI_ENTITY_append_BULKI_Entity(dart_ent, + BULKI_ENTITY(&(dart->num_vnode), 1, PDC_UINT64, PDC_CLS_ITEM)); + + BULKI_Entity_serialize_to_file(dart_ent, stream); + } +} + +perr_t +idioms_metadata_index_dump(IDIOMS_t *idioms, char *dir_path, uint32_t serverID) +{ + perr_t ret_value = SUCCEED; + + stopwatch_t timer; + timer_start(&timer); + + // dump DART info + dump_dart_info(idioms->dart_info_g, dir_path, serverID); + + dump_attr_root_tree(idioms->art_key_prefix_tree_g, dir_path, "idioms_prefix", serverID); + dump_attr_root_tree(idioms->art_key_suffix_tree_g, dir_path, "idioms_suffix", serverID); + + timer_pause(&timer); + println("[IDIOMS_Index_Dump_%d] Timer to dump index = %.4f microseconds\n", serverID, + timer_delta_us(&timer)); + return ret_value; +} + +// *********************** Index Loading *********************************** + +int +fill_set_from_BULKI_Entity(value_index_leaf_content_t *value_index_leaf, BULKI_Entity *data_entity, + int64_t *index_record_count) +{ + BULKI_Entity_Iterator *it = Bent_iterator_init(data_entity, NULL, PDC_UNKNOWN); + while (Bent_iterator_has_next_Bent(it)) { + BULKI_Entity *id_entity = Bent_iterator_next_Bent(it); + uint64_t * obj_id = calloc(1, sizeof(uint64_t)); + memcpy(obj_id, id_entity->data, sizeof(uint64_t)); + set_insert(value_index_leaf->obj_id_set, obj_id); + value_index_leaf->indexed_item_count++; + (*index_record_count)++; + } + return 0; +} + +int +read_attr_value_node(key_index_leaf_content_t *leaf_cnt, int value_tree_idx, BULKI *v_id_bulki, + int64_t *index_record_count) +{ + int rst = 0; + BULKI_KV_Pair_Iterator *it = BULKI_KV_Pair_iterator_init(v_id_bulki); + while (BULKI_KV_Pair_iterator_has_next(it)) { + BULKI_KV_Pair *kv_pair = BULKI_KV_Pair_iterator_next(it); + BULKI_Entity * key_entity = &(kv_pair->key); + BULKI_Entity * data_entity = &(kv_pair->value); + + value_index_leaf_content_t *value_index_leaf = + (value_index_leaf_content_t *)calloc(1, sizeof(value_index_leaf_content_t)); + value_index_leaf->obj_id_set = set_new(ui64_hash, ui64_equal); + set_register_free_function(value_index_leaf->obj_id_set, free); + + fill_set_from_BULKI_Entity(value_index_leaf, data_entity, index_record_count); + + if (value_tree_idx == 0 && key_entity->pdc_type == PDC_STRING) { + art_tree *value_index_art = (art_tree *)leaf_cnt->primary_trie; + art_insert(value_index_art, (const unsigned char *)key_entity->data, strlen(key_entity->data), + value_index_leaf); + leaf_cnt->indexed_item_count++; + } + if (value_tree_idx == 1 && key_entity->pdc_type == PDC_STRING) { + art_tree *value_index_art = (art_tree *)leaf_cnt->secondary_trie; + art_insert(value_index_art, (const unsigned char *)key_entity->data, strlen(key_entity->data), + value_index_leaf); + } + if (value_tree_idx == 2 && key_entity->pdc_type != PDC_STRING) { + rbt_t *value_index_rbt = (rbt_t *)leaf_cnt->primary_rbt; + rbt_add(value_index_rbt, key_entity->data, key_entity->size, value_index_leaf); + leaf_cnt->indexed_item_count++; + } + if (value_tree_idx == 3 && key_entity->pdc_type != PDC_STRING) { + rbt_t *value_index_rbt = (rbt_t *)leaf_cnt->secondary_rbt; + rbt_add(value_index_rbt, key_entity->data, key_entity->size, value_index_leaf); + } + } + return rst; +} + +int +read_value_tree(key_index_leaf_content_t *leaf_cnt, int value_tree_idx, BULKI *v_id_bulki, + int64_t *index_record_count) +{ + int rst = 0; + + switch (value_tree_idx) { + case 0: + leaf_cnt->primary_trie = (art_tree *)calloc(1, sizeof(art_tree)); + art_tree_init(leaf_cnt->primary_trie); + _encodeTypeToBitmap(&(leaf_cnt->val_idx_dtype), PDC_STRING); + rst = read_attr_value_node(leaf_cnt, 0, v_id_bulki, index_record_count); + break; + case 1: + leaf_cnt->secondary_trie = (art_tree *)calloc(1, sizeof(art_tree)); + art_tree_init(leaf_cnt->secondary_trie); + _encodeTypeToBitmap(&(leaf_cnt->val_idx_dtype), PDC_STRING); + rst = read_attr_value_node(leaf_cnt, 1, v_id_bulki, index_record_count); + break; + case 2: + if (v_id_bulki->numKeys > 0) { + leaf_cnt->primary_rbt = + (rbt_t *)rbt_create_by_dtype(v_id_bulki->header->keys[0].pdc_type, PDC_free_void); + _encodeTypeToBitmap(&(leaf_cnt->val_idx_dtype), rbt_get_dtype(leaf_cnt->primary_rbt)); + rst = read_attr_value_node(leaf_cnt, 2, v_id_bulki, index_record_count); + } + break; + case 3: + if (v_id_bulki->numKeys > 0) { + leaf_cnt->secondary_rbt = + (rbt_t *)rbt_create_by_dtype(v_id_bulki->header->keys[0].pdc_type, PDC_free_void); + _encodeTypeToBitmap(&(leaf_cnt->val_idx_dtype), rbt_get_dtype(leaf_cnt->secondary_rbt)); + rst = read_attr_value_node(leaf_cnt, 3, v_id_bulki, index_record_count); + } + break; + default: + break; + } + return rst; +} + +int +read_attr_name_node(IDIOMS_t *idioms, char *dir_path, char *base_name, uint32_t serverID, uint64_t vnode_id) +{ + int rst = 0; + char file_name[1024]; + sprintf(file_name, "%s/%s_%" PRIu32 "_%" PRIu64 ".bin", dir_path, base_name, serverID, vnode_id); + + // check file existence + if (access(file_name, F_OK) == -1) { + return FAIL; + } + FILE *stream = fopen(file_name, "rb"); + if (stream == NULL) { + return FAIL; + } + BULKI *bulki = BULKI_deserialize_from_file(stream); + + art_tree *art_key_index = NULL; + if (strcmp(base_name, "idioms_prefix") == 0) { + art_key_index = idioms->art_key_prefix_tree_g; + } + else if (strcmp(base_name, "idioms_suffix") == 0) { + art_key_index = idioms->art_key_suffix_tree_g; + } + else { + LOG_ERROR("Unknown base_name: %s\n", base_name); + return FAIL; + } + + LOG_INFO("Loaded Index from file_name: %s\n", file_name); + + // iterate the bulki structure and insert the key-value pair into the art tree + BULKI_KV_Pair_Iterator *it = BULKI_KV_Pair_iterator_init(bulki); + while (BULKI_KV_Pair_iterator_has_next(it)) { + BULKI_KV_Pair *kv_pair = BULKI_KV_Pair_iterator_next(it); + BULKI_Entity * key_entity = &(kv_pair->key); + BULKI_Entity * data_entity = &(kv_pair->value); + + key_index_leaf_content_t *leafcnt = + (key_index_leaf_content_t *)calloc(1, sizeof(key_index_leaf_content_t)); + art_insert(art_key_index, (const unsigned char *)key_entity->data, strlen(key_entity->data), leafcnt); + + BULKI *tree_bulki = (BULKI *)data_entity->data; + // restore primary trie + BULKI_Entity *primary_trie_ent = + BULKI_get(tree_bulki, BULKI_ENTITY("primary_trie", 1, PDC_STRING, PDC_CLS_ITEM)); + if (primary_trie_ent != NULL) { + BULKI *v_id_bulki = (BULKI *)primary_trie_ent->data; + read_value_tree(leafcnt, 0, v_id_bulki, &(idioms->index_record_count_g)); + } + // restore secondary trie + BULKI_Entity *secondary_trie_ent = + BULKI_get(tree_bulki, BULKI_ENTITY("secondary_trie", 1, PDC_STRING, PDC_CLS_ITEM)); + if (secondary_trie_ent != NULL) { + BULKI *v_id_bulki = (BULKI *)secondary_trie_ent->data; + read_value_tree(leafcnt, 1, v_id_bulki, &(idioms->index_record_count_g)); + } + // restore primary rbt + BULKI_Entity *primary_rbt_ent = + BULKI_get(tree_bulki, BULKI_ENTITY("primary_rbt", 1, PDC_STRING, PDC_CLS_ITEM)); + if (primary_rbt_ent != NULL) { + BULKI *v_id_bulki = (BULKI *)primary_rbt_ent->data; + read_value_tree(leafcnt, 2, v_id_bulki, &(idioms->index_record_count_g)); + } + // restore secondary rbt + BULKI_Entity *secondary_rbt_ent = + BULKI_get(tree_bulki, BULKI_ENTITY("secondary_rbt", 1, PDC_STRING, PDC_CLS_ITEM)); + if (secondary_rbt_ent != NULL) { + BULKI *v_id_bulki = (BULKI *)secondary_rbt_ent->data; + read_value_tree(leafcnt, 3, v_id_bulki, &(idioms->index_record_count_g)); + } + } + return rst; +} + +void +load_dart_info(DART *dart, char *dir_path, uint32_t serverID) +{ + if (serverID == 0) { + char file_name[1024]; + sprintf(file_name, "%s/%s.bin", dir_path, "dart_info"); + FILE *stream = fopen(file_name, "rb"); + if (stream == NULL) { + return; + } + BULKI_Entity * dart_ent = BULKI_Entity_deserialize_from_file(stream); + BULKI_Entity_Iterator *it = Bent_iterator_init(dart_ent, NULL, PDC_UNKNOWN); + int i = 0; + while (Bent_iterator_has_next_Bent(it)) { + BULKI_Entity *entry = Bent_iterator_next_Bent(it); + switch (i) { + case 0: + if (entry->pdc_type == PDC_INT) { + dart->alphabet_size = *(int *)entry->data; + } + break; + case 1: + if (entry->pdc_type == PDC_INT) { + dart->dart_tree_height = *(int *)entry->data; + } + break; + case 2: + if (entry->pdc_type == PDC_INT) { + dart->replication_factor = *(int *)entry->data; + } + break; + case 3: + if (entry->pdc_type == PDC_INT) { + dart->client_request_count = *(int *)entry->data; + } + break; + case 4: + + if (entry->pdc_type == PDC_UINT32) { + dart->num_server = *(uint32_t *)entry->data; + } + break; + + case 5: + + if (entry->pdc_type == PDC_UINT64) { + dart->num_vnode = *(uint64_t *)entry->data; + } + break; + } + i++; + } + } +} + +perr_t +idioms_metadata_index_recover(IDIOMS_t *idioms, char *dir_path, int num_server, uint32_t serverID) +{ + perr_t ret_value = SUCCEED; + + stopwatch_t timer; + timer_start(&timer); + + load_dart_info(idioms->dart_info_g, dir_path, serverID); + + uint64_t *vid_array = NULL; + size_t num_vids = get_vnode_ids_by_serverID(idioms->dart_info_g, serverID, &vid_array); + + // load the attribute region for each vnode + for (size_t vid = 0; vid < num_vids; vid++) { + for (size_t sid = 0; sid < num_server; sid++) { + read_attr_name_node(idioms, dir_path, "idioms_prefix", sid, vid_array[vid]); + read_attr_name_node(idioms, dir_path, "idioms_suffix", sid, vid_array[vid]); + } + } + timer_pause(&timer); + println("[IDIOMS_Index_Recover_%d] Timer to recover index = %.4f microseconds\n", serverID, + timer_delta_us(&timer)); + return ret_value; +} diff --git a/src/commons/index/dart/index/idioms/include/idioms_local_index.h b/src/commons/index/dart/index/idioms/include/idioms_local_index.h new file mode 100644 index 000000000..d4ba187f5 --- /dev/null +++ b/src/commons/index/dart/index/idioms/include/idioms_local_index.h @@ -0,0 +1,154 @@ +#ifndef IDIOMS_LOCAL_INDEX_H +#define IDIOMS_LOCAL_INDEX_H + +#include "pdc_public.h" +#include "pdc_config.h" +#include "query_utils.h" +#include "timer_utils.h" +#include "pdc_generic.h" +#include "art.h" +#include "pdc_set.h" +#include "pdc_hash.h" +#include "pdc_compare.h" +#include "dart_core.h" +// #include "pdc_hash_table.h" +#include "bulki_serde.h" +#include "rbtree.h" + +/** + * 2024-03-07: TODO items + * 1. Debugging the Index persistence mechanism + * 2. Make sure every IDIOMS API returns a struct that contains the number of index items. + */ + +typedef struct { + art_tree *art_key_prefix_tree_g; + art_tree *art_key_suffix_tree_g; + DART * dart_info_g; + uint32_t server_id_g; + uint32_t num_servers_g; + int64_t index_record_count_g; + int64_t search_request_count_g; + int64_t insert_request_count_g; + int64_t delete_request_count_g; + double time_to_create_index_g; + double time_to_search_index_g; + double time_to_delete_index_g; +} IDIOMS_t; + +typedef struct { + char * key; + int8_t is_key_suffix; + uint64_t virtual_node_id; + pdc_c_var_type_t type; + // int simple_value_type; // 0: uint64_t, 1: int64_t, 2: double, 3: char* + void * value; + size_t value_len; + uint64_t *obj_ids; + size_t num_obj_ids; + size_t key_offset; + size_t value_offset; + uint32_t src_client_id; +} IDIOMS_md_idx_record_t; + +typedef struct { + uint64_t virtural_node_id; + size_t indexed_item_count; + // pdc_c_var_type_t type; + // int simple_value_type; // 0: uint64_t, 1: int64_t, 2: double, 3: char* + // Also, for key lookup ART, we also maintain the pointer to the value tree + art_tree *primary_trie; + art_tree *secondary_trie; + rbt_t * primary_rbt; + rbt_t * secondary_rbt; + uint8_t val_idx_dtype; // 0: uint64_t, 1: int64_t, 2: double +} key_index_leaf_content_t; + +typedef struct { + Set * obj_id_set; + size_t indexed_item_count; +} value_index_leaf_content_t; + +typedef struct { + void * value; + size_t value_len; + uint64_t *obj_ids; + size_t num_obj_ids; +} value_index_record_t; + +typedef struct { + value_index_record_t *value_idx_record; + uint64_t num_value_idx_record; + char * key; + uint64_t virtual_node_id; +} key_index_record_t; + +typedef struct { + void * buffer; + size_t buffer_size; + size_t buffer_capacity; + size_t num_keys; +} index_buffer_t; + +static void +_init_dart_space_via_idioms(DART *dart, int num_server) +{ + dart_space_init(dart, num_server); +} + +static void +_encodeTypeToBitmap(uint8_t *bitmap, enum pdc_c_var_type_t type) +{ + if (bitmap == NULL) { + return; + } + if (type >= PDC_STRING) { // Non-numerical types + *bitmap |= ((type - PDC_STRING + 1) << 4); // Shift by 4 to set in the higher 4 bits + } + else { // Numerical types + *bitmap |= (type & 0x0F); // Ensure only lower 4 bits are used for numerical types + } +} + +// Function to get numerical type from the bitmap +static enum pdc_c_var_type_t +_getNumericalTypeFromBitmap(uint8_t bitmap) +{ + return (enum pdc_c_var_type_t)(bitmap & 0x0F); // Extract lower 4 bits +} + +// Function to get string (non-numerical) type from the bitmap +static enum pdc_c_var_type_t +_getCompoundTypeFromBitmap(uint8_t bitmap) +{ + return (enum pdc_c_var_type_t)(((bitmap >> 4) & 0x0F) + PDC_STRING - + 1); // Extract higher 4 bits and adjust index +} + +/** + * @brief Initialize the ART root index + */ +IDIOMS_t *IDIOMS_init(uint32_t server_id, uint32_t num_servers); + +/** + * @brief Create local index with the information in idx_record. + * @param idx_record Only the 'key', 'type' and 'value' and 'value_size' fields are used. + * @return perr_t SUCCESS on success, FAIL on failure + */ +perr_t idioms_local_index_create(IDIOMS_t *idioms, IDIOMS_md_idx_record_t *idx_record); + +/** + * @brief Delete the local index with the information in idx_record. + * @param idx_record Only the 'key', 'type' and 'value' and 'value_size' fields are used. + * @return perr_t SUCCESS on success, FAIL on failure + */ +perr_t idioms_local_index_delete(IDIOMS_t *idioms, IDIOMS_md_idx_record_t *idx_record); + +/** + * @brief Search the ART index for the given key. + * @param idx_record 'key', 'type' and 'value' and 'value_size' fields are used for input. + * @return the number of object IDs found. The object IDs are stored in the 'obj_id' field of the idx_record. + */ +uint64_t idioms_local_index_search(IDIOMS_t *idioms, IDIOMS_md_idx_record_t *idx_record); + +#endif // IDIOMS_LOCAL_INDEX_H diff --git a/src/commons/index/dart/index/idioms/include/idioms_persistence.h b/src/commons/index/dart/index/idioms/include/idioms_persistence.h new file mode 100644 index 000000000..8d5a413c1 --- /dev/null +++ b/src/commons/index/dart/index/idioms/include/idioms_persistence.h @@ -0,0 +1,32 @@ +#ifndef IDIOMS_PERSISTENCE_H +#define IDIOMS_PERSISTENCE_H + +#include "idioms_local_index.h" + +/** + * @brief Dumping the index to a file. + * @param dir_path The directory path to store the index file. + * @param serverID The server ID. + * @return perr_t SUCCESS on success, FAIL on failure + */ +perr_t idioms_metadata_index_dump(IDIOMS_t *idioms, char *dir_path, uint32_t serverID); + +/** + * @brief Recovering the index from a file. Please initialize idioms before calling this function. + * @param dir_path The directory path to store the index file. + * @param num_server The number of servers. + * @param serverID The server ID. + * @return perr_t SUCCESS on success, FAIL on failure + */ +perr_t idioms_metadata_index_recover(IDIOMS_t *idioms, char *dir_path, int num_server, uint32_t serverID); + +// /** +// * @brief Initialize the DART space via idioms. +// * @param dart The DART space to be initialized. +// * @param num_client The number of clients. +// * @param num_server The number of servers. +// * @param max_server_num_to_adapt The maximum number of servers to adapt. +// */ +// void init_dart_space_via_idioms(DART *dart, int num_client, int num_server, int max_server_num_to_adapt); + +#endif // IDIOMS_PERSISTENCE_H \ No newline at end of file diff --git a/src/commons/logging/include/pdc_logger.h b/src/commons/logging/include/pdc_logger.h new file mode 100644 index 000000000..d7a6c110b --- /dev/null +++ b/src/commons/logging/include/pdc_logger.h @@ -0,0 +1,37 @@ +#ifndef PDC_LOGGER_H +#define PDC_LOGGER_H + +#include +#include +#include +#include + +#define MAX_LOG_MSG_LENGTH 1024 +#define MAX_LOG_FILE_SIZE (10 * 1024 * 1024) // 10 MB +#define MAX_LOG_FILE_NAME_LENGTH 256 + +typedef enum { LOG_LEVEL_ERROR, LOG_LEVEL_WARNING, LOG_LEVEL_INFO, LOG_LEVEL_DEBUG } PDC_LogLevel; + +static FILE * logFiles[4] = {NULL}; // Log files for each log level +static char logFilenames[4][MAX_LOG_FILE_NAME_LENGTH]; +static PDC_LogLevel logLevel = LOG_LEVEL_INFO; + +void setLogFile(PDC_LogLevel level, const char *fileName); + +void setLogLevel(PDC_LogLevel level); + +void log_message(PDC_LogLevel level, const char *format, ...); + +void log_message_nlf(PDC_LogLevel level, const char *format, ...); + +#define LOG_ERROR(format, ...) log_message(LOG_LEVEL_ERROR, format, ##__VA_ARGS__) +#define LOG_WARNING(format, ...) log_message(LOG_LEVEL_WARNING, format, ##__VA_ARGS__) +#define LOG_INFO(format, ...) log_message(LOG_LEVEL_INFO, format, ##__VA_ARGS__) +#define LOG_DEBUG(format, ...) log_message(LOG_LEVEL_DEBUG, format, ##__VA_ARGS__) + +#define NLF_LOG_ERROR(format, ...) log_message_nlf(LOG_LEVEL_ERROR, format, ##__VA_ARGS__) +#define NLF_LOG_WARNING(format, ...) log_message_nlf(LOG_LEVEL_WARNING, format, ##__VA_ARGS__) +#define NLF_LOG_INFO(format, ...) log_message_nlf(LOG_LEVEL_INFO, format, ##__VA_ARGS__) +#define NLF_LOG_DEBUG(format, ...) log_message_nlf(LOG_LEVEL_DEBUG, format, ##__VA_ARGS__) + +#endif // PDC_LOGGER_H \ No newline at end of file diff --git a/src/commons/logging/pdc_logger.c b/src/commons/logging/pdc_logger.c new file mode 100644 index 000000000..6952f44bc --- /dev/null +++ b/src/commons/logging/pdc_logger.c @@ -0,0 +1,123 @@ +#include "pdc_logger.h" +#include +#include +#include + +void +setLogFile(PDC_LogLevel level, const char *fileName) +{ + + if (logFiles[level] && logFiles[level] != stdout && logFiles[level] != stderr) { + fclose(logFiles[level]); + } + if (fileName) { + if (strcmp(fileName, "stderr") == 0) { + logFiles[level] = stderr; + } + else if (strcmp(fileName, "stdout") == 0) { + logFiles[level] = stdout; + } + else { + strncpy(logFilenames[level], fileName, sizeof(logFilenames[level]) - 1); + logFilenames[level][sizeof(logFilenames[level]) - 1] = '\0'; + logFiles[level] = fopen(fileName, "a"); + } + } + else { + logFiles[level] = stdout; + } +} + +void +setLogLevel(PDC_LogLevel level) +{ + logLevel = level; +} + +void +rotate_log_file(PDC_LogLevel level) +{ + if (logFiles[level]) { + if (logFiles[level] == stdout || logFiles[level] == stderr) { + return; // for stdout and stderr, we don't rotate + } + fclose(logFiles[level]); + logFiles[level] = NULL; + } + + char newFilename[MAX_LOG_FILE_NAME_LENGTH]; + char timeStr[20]; + time_t rawtime = time(NULL); + struct tm *timeinfo = localtime(&rawtime); + + strftime(timeStr, 20, "%Y%m%d%H:%M:%S", timeinfo); + newFilename[strlen(newFilename) - 1] = '\0'; // Remove trailing newline + + snprintf(newFilename, MAX_LOG_FILE_NAME_LENGTH, "%s_%s", logFilenames[level], timeStr); + rename(logFilenames[level], newFilename); + logFiles[level] = fopen(logFilenames[level], "a"); +} + +void +_log_message(int lf, PDC_LogLevel level, const char *format, va_list args) +{ + if (level > logLevel) { + return; + } + + char prefix[16]; + switch (level) { + case LOG_LEVEL_ERROR: + strcpy(prefix, "ERROR"); + break; + case LOG_LEVEL_WARNING: + strcpy(prefix, "WARNING"); + break; + case LOG_LEVEL_INFO: + strcpy(prefix, "INFO"); + break; + case LOG_LEVEL_DEBUG: + strcpy(prefix, "DEBUG"); + break; + } + + char *log_format = (lf == 1) ? "[%s.%06ld] [%s] %s\n" : "[%s.%06ld] [%s] %s"; + + char *message = (char *)calloc(MAX_LOG_MSG_LENGTH + 1, sizeof(char)); + vsnprintf(message, MAX_LOG_MSG_LENGTH, format, args); + + struct timeval tv; + gettimeofday(&tv, NULL); + struct tm *timeinfo = localtime(&tv.tv_sec); + char * timestr = (char *)calloc(20, sizeof(char)); + strftime(timestr, 19, "%Y-%m-%d %H:%M:%S", timeinfo); + + // Rotate log file if it exceeds the maximum size, but this doesn't apply to stdout and stderr + if (logFiles[level] != stdout && logFiles[level] != stderr) { + struct stat st; + stat(logFilenames[level], &st); + if (st.st_size >= MAX_LOG_FILE_SIZE) { + rotate_log_file(level); + } + } + FILE *logFile = logFiles[level] ? logFiles[level] : stdout; + fprintf(logFile, log_format, timestr, tv.tv_usec, prefix, message); +} + +void +log_message_nlf(PDC_LogLevel level, const char *format, ...) +{ + va_list args; + va_start(args, format); + _log_message(0, level, format, args); + va_end(args); +} + +void +log_message(PDC_LogLevel level, const char *format, ...) +{ + va_list args; + va_start(args, format); + _log_message(1, level, format, args); + va_end(args); +} \ No newline at end of file diff --git a/src/commons/query/include/qc_parser.h b/src/commons/query/include/qc_parser.h new file mode 100644 index 000000000..f8730b035 --- /dev/null +++ b/src/commons/query/include/qc_parser.h @@ -0,0 +1,56 @@ +#ifndef QC_PARSER_H +#define QC_PARSER_H + +#include +#include +#include +#include "string_utils.h" + +typedef enum { NONE = 0, OR = 1, AND = 2, NOT = 3 } logical_op_t; + +typedef enum { FACTOR = 0, TERM = 1, EXPRESSION = 2 } condition_type_t; + +static const char *logicalOpStr[] = {NULL, "OR", "AND", "NOT"}; + +typedef struct subClause { + int start; + int end; + int level; + struct subClause *subClauseArray; + int numSubClauses; +} subClause; + +/** + * 1. Factor -> left and right are NULL && op is NONE. if the corresponding string contains 'AND'/'OR' then it + * is a string of expression. + * 2. Term -> either left or right must be factor, the other can be a factor or a term + * 3. Expression -> either left or right must be a term, the other can be a term or an expression + */ +typedef struct Condition { + int start; + int end; + int level; + condition_type_t type; + struct Condition *left; + struct Condition *right; + logical_op_t op; +} Condition; + +int isFactor(char *expression, Condition *condition); +int isTerm(char *expression, Condition *condition); +int isExpression(char *expression, Condition *condition); +int isExpressionStringFactor(char *expression, Condition *condition); + +int extractExpression(char *expression, Condition *condition); + +int extractTerm(char *expression, Condition *condition); + +int extractFactor(char *expression, Condition *condition); + +int splitCondition(char *expression, Condition *condition); + +int subClauseExtractor(char *expression, int start, int end, int level, subClause **subPtr); + +void printSubClauses(char *expression, subClause *subPtr, int numSubClauses); + +#endif // QC_PARSER_H \ No newline at end of file diff --git a/src/commons/query/qc_parser.c b/src/commons/query/qc_parser.c new file mode 100644 index 000000000..2a1b21ec5 --- /dev/null +++ b/src/commons/query/qc_parser.c @@ -0,0 +1,250 @@ +#include "qc_parser.h" + +void +printSubExpression(char *expression, Condition *condition) +{ + if (condition == NULL) { + printf("NULL\n"); + return; + } + printf("level %d, logical op: %s, start: %d, end: %d ", condition->level, logicalOpStr[condition->op], + condition->start, condition->end); + printf("["); + for (int i = condition->start; i < condition->end; i++) { + printf("%c", expression[i]); + } + printf("]\n"); + printf("left:"); + printSubExpression(expression, condition->left); + printf("right:"); + printSubExpression(expression, condition->right); +} + +Condition * +createSubCondition(int start, int end, int level) +{ + Condition *condition = (Condition *)malloc(sizeof(Condition)); + condition->start = start; + condition->end = end; + condition->level = level; + return condition; +} + +int +isFactor(char *expression, Condition *condition) +{ + if (condition->start == condition->end) { + return 0; + } + // if the string contains matching pairs of '(' and ')' then it is a string of expression + int pLevel = 0; + for (int i = condition->start; i < condition->end; i++) { + if (expression[i] == '(') { + pLevel++; + } + if (expression[i] == ')') { + pLevel--; + if (pLevel == 0) { + return 1; + } + } + } + + // if the string contains 'AND'/'OR' then it is not a factor + for (int i = condition->start; i < condition->end; i++) { + if (expression[i] == 'A' && expression[i + 1] == 'N' && expression[i + 2] == 'D') { + return 0; + } + if (expression[i] == 'O' && expression[i + 1] == 'R') { + return 0; + } + } + return 1; +} + +int +isTerm(char *expression, Condition *condition) +{ + if (condition->start == condition->end) { + return 0; + } + // if the string contains matching pairs of '(' and ')' then it is a string of expression + int pLevel = 0; + for (int i = condition->start; i < condition->end; i++) { + if (expression[i] == '(') { + pLevel++; + } + if (expression[i] == ')') { + pLevel--; + } + } + if (pLevel == 0) { + return 1; + } + // if the string contains 'AND'/'OR' then it is not a factor + for (int i = condition->start; i < condition->end; i++) { + if (expression[i] == 'A' && expression[i + 1] == 'N' && expression[i + 2] == 'D') { + return 1; + } + } + return 0; +} +int isExpression(char *expression, Condition *condition); +int isExpressionStringFactor(char *expression, Condition *condition); + +int +extractExpression(char *expression, Condition *condition) +{ + int levelCounter = condition->level; + int i1 = condition->start; + int i2 = i1 + 1; + int rst = -1; + while (i2 < condition->end) { + int l_start = condition->start; + int l_end = condition->end; + int r_start = condition->start; + int r_end = condition->end; + logical_op_t op; + + if (expression[i1] == '(') { + levelCounter++; + } + if (expression[i1] == ')') { + levelCounter--; + } + // printf("i1: %d, i2: %d, levelCounter: %d\n", i1, i2, levelCounter); + if (levelCounter == condition->level) { + int conditionFound = 0; + if (expression[i1] == 'O' && expression[i2] == 'R') { // FOUND OR on the same level + l_start = condition->start; + l_end = i1 - 1; + r_start = i2 + 1; + r_end = condition->end; + op = OR; + conditionFound = 1; + } + if (conditionFound) { + condition->left = createSubCondition(l_start, l_end, condition->level); + condition->right = createSubCondition(r_start, r_end, condition->level); + condition->op = op; + + printSubExpression(expression, condition); + splitCondition(expression, condition->left); + splitCondition(expression, condition->right); + } + } + i1++; + i2 = i1 + 1; + rst = 0; + } + return rst; +} + +int extractTerm(char *expression, Condition *condition); + +int extractFactor(char *expression, Condition *condition); + +int +splitCondition(char *expression, Condition *condition) +{ + int levelCounter = condition->level; + int i1 = condition->start; + int i2 = i1 + 1; + int rst = -1; + while (i2 < condition->end) { + int l_start = condition->start; + int l_end = condition->end; + int r_start = condition->start; + int r_end = condition->end; + logical_op_t op; + + if (expression[i1] == '(') { + levelCounter++; + } + if (expression[i1] == ')') { + levelCounter--; + } + // printf("i1: %d, i2: %d, levelCounter: %d\n", i1, i2, levelCounter); + if (levelCounter == condition->level) { + int conditionFound = 0; + if (expression[i1] == 'O' && expression[i2] == 'R') { // FOUND OR on the same level + l_start = condition->start; + l_end = i1 - 1; + r_start = i2 + 1; + r_end = condition->end; + op = OR; + conditionFound = 1; + } + if (expression[i1] == 'A' && expression[i2] == 'N' && + expression[i2 + 1] == 'D') { // FOUND AND on the same level + l_start = condition->start; + l_end = i1 - 1; + r_start = i2 + 2; + r_end = condition->end; + op = AND; + conditionFound = 1; + } + if (conditionFound) { + condition->left = createSubCondition(l_start, l_end, condition->level); + condition->right = createSubCondition(r_start, r_end, condition->level); + condition->op = op; + + printSubExpression(expression, condition); + splitCondition(expression, condition->left); + splitCondition(expression, condition->right); + } + } + i1++; + i2 = i1 + 1; + rst = 0; + } + return rst; +} + +int +subClauseExtractor(char *expression, int start, int end, int level, subClause **subPtr) +{ + int levelCounter = level; + *subPtr = NULL; // Initially, there are no subclauses. + int numSubClauses = 0; + + for (int i = start; i < end; i++) { + if (expression[i] == '(') { + levelCounter++; + if (levelCounter == level + 1) { + // Allocate or reallocate space for a new subclause. + *subPtr = (subClause *)realloc(*subPtr, sizeof(subClause) * (numSubClauses + 1)); + subClause *newClause = &((*subPtr)[numSubClauses]); + newClause->start = i + 1; + newClause->level = levelCounter; + newClause->subClauseArray = NULL; // Important to initialize. + } + } + else if (expression[i] == ')') { + levelCounter--; + if (levelCounter == level) { + subClause *newClause = &((*subPtr)[numSubClauses]); + newClause->end = i - 1; + newClause->numSubClauses = subClauseExtractor(expression, newClause->start, newClause->end, + newClause->level, &newClause->subClauseArray); + numSubClauses++; + } + } + } + return numSubClauses; +} + +void +printSubClauses(char *expression, subClause *subPtr, int numSubClauses) +{ + for (int i = 0; i < numSubClauses; i++) { + printf("SubClause %d: Level: %d, Expression: ", i, subPtr[i].level); + for (int j = subPtr[i].start; j <= subPtr[i].end; j++) { + printf("%c", expression[j]); + } + printf("\n"); + if (subPtr[i].numSubClauses > 0) { + printSubClauses(expression, subPtr[i].subClauseArray, subPtr[i].numSubClauses); + } + } +} \ No newline at end of file diff --git a/src/commons/query/qc_parser_test.c b/src/commons/query/qc_parser_test.c new file mode 100644 index 000000000..36babbf68 --- /dev/null +++ b/src/commons/query/qc_parser_test.c @@ -0,0 +1,13 @@ +#include "qc_parser.h" + +int +main() +{ + char *expression = + "z=\"tsa\" AND t=234234 OR NOT (e=234 AND (a=123 OR b=\"abc\") AND ((NOT c=987) OR d=258))"; + Condition *root = (Condition *)malloc(sizeof(Condition)); + root->start = 0; + root->end = strlen(expression); + root->level = 0; + splitCondition(expression, root); +} \ No newline at end of file diff --git a/src/commons/serde/bulki.c b/src/commons/serde/bulki.c index a60441f31..de9139d6d 100644 --- a/src/commons/serde/bulki.c +++ b/src/commons/serde/bulki.c @@ -1,115 +1,595 @@ #include "bulki.h" +size_t +get_BULKI_Entity_size(BULKI_Entity *bulk_entity) +{ + if (bulk_entity == NULL) { + return 0; + } + size_t size = sizeof(int8_t) * 2 + sizeof(uint64_t) * 2; + if (bulk_entity->pdc_class == PDC_CLS_ARRAY) { + if (bulk_entity->pdc_type == PDC_BULKI) { + BULKI *bulki_array = (BULKI *)bulk_entity->data; + for (size_t i = 0; i < bulk_entity->count; i++) { + size += get_BULKI_size(&bulki_array[i]); + } + } + else if (bulk_entity->pdc_type == PDC_BULKI_ENT) { + BULKI_Entity *bulki_entity_array = (BULKI_Entity *)bulk_entity->data; + for (size_t i = 0; i < bulk_entity->count; i++) { + size += get_BULKI_Entity_size(&bulki_entity_array[i]); + } + } + else { + size += get_size_by_class_n_type(bulk_entity->data, bulk_entity->count, bulk_entity->pdc_class, + bulk_entity->pdc_type); + } + } + else if (bulk_entity->pdc_class == PDC_CLS_ITEM) { + if (bulk_entity->pdc_type == PDC_BULKI) { + size += get_BULKI_size((BULKI *)bulk_entity->data); + } + if (bulk_entity->pdc_type == PDC_BULKI_ENT) { + size += get_BULKI_Entity_size((BULKI_Entity *)bulk_entity->data); + } + else { + size += get_size_by_class_n_type(bulk_entity->data, bulk_entity->count, bulk_entity->pdc_class, + bulk_entity->pdc_type); + } + } + bulk_entity->size = size; + return size; +} + +size_t +get_BULKI_size(BULKI *bulki) +{ + if (bulki == NULL) { + return 0; + } + size_t size = sizeof(uint64_t) * 6; // totalSize + numKeys + headerSize + dataSize + offsets * 2; + for (size_t i = 0; i < bulki->numKeys; i++) { + size += get_BULKI_Entity_size(&bulki->header->keys[i]); + size += get_BULKI_Entity_size(&bulki->data->values[i]); + } + bulki->totalSize = size; + return size; +} + +void +BULKI_Entity_print(BULKI_Entity *bulk_entity) +{ + if (bulk_entity == NULL) { + printf("Error: bulki_entity is NULL\n"); + return; + } + printf("BULKI_Entity:\n"); + printf("pdc_class: %d\n", bulk_entity->pdc_class); + printf("pdc_type: %d\n", bulk_entity->pdc_type); + printf("count: %zu\n", bulk_entity->count); + printf("size: %zu\n", bulk_entity->size); + if (bulk_entity->pdc_class == PDC_CLS_ARRAY) { + if (bulk_entity->pdc_type == PDC_BULKI) { + BULKI *bulki_array = (BULKI *)bulk_entity->data; + for (size_t i = 0; i < bulk_entity->count; i++) { + printf("BULKI[%zu]:\n", i); + BULKI_print(&bulki_array[i]); + } + } + else if (bulk_entity->pdc_type == PDC_BULKI_ENT) { + BULKI_Entity *bulki_entity_array = (BULKI_Entity *)bulk_entity->data; + for (size_t i = 0; i < bulk_entity->count; i++) { + printf("BULKI_Entity[%zu]:\n", i); + BULKI_Entity_print(&bulki_entity_array[i]); + } + } + else { + printf("BULKI_Entity[%zu]:\n", bulk_entity->count); + for (size_t i = 0; i < bulk_entity->count; i++) { + printf("%s : ", DataTypeNames[bulk_entity->pdc_type]); + } + printf("\n"); + } + } + else if (bulk_entity->pdc_class == PDC_CLS_ITEM) { + if (bulk_entity->pdc_type == PDC_BULKI) { + printf("BULKI:\n"); + BULKI_print((BULKI *)bulk_entity->data); + } + else { + printf("%s\n", DataTypeNames[bulk_entity->pdc_type]); + } + } +} + +void +BULKI_print(BULKI *bulki) +{ + if (bulki == NULL) { + printf("Error: bulki is NULL\n"); + return; + } + printf("BULKI:\n"); + printf("totalSize: %zu\n", bulki->totalSize); + printf("numKeys: %zu\n", bulki->numKeys); + printf("headerSize: %zu\n", bulki->header->headerSize); + printf("dataSize: %zu\n", bulki->data->dataSize); + for (size_t i = 0; i < bulki->numKeys; i++) { + printf("key[%zu]:\n", i); + BULKI_Entity_print(&bulki->header->keys[i]); + printf("value[%zu]:\n", i); + BULKI_Entity_print(&bulki->data->values[i]); + } +} + +BULKI_Entity * +empty_BULKI_Array_Entity() +{ + BULKI_Entity *bulki_entity = (BULKI_Entity *)calloc(1, sizeof(BULKI_Entity)); + bulki_entity->pdc_type = PDC_BULKI; + bulki_entity->pdc_class = PDC_CLS_ARRAY; + bulki_entity->count = 0; + bulki_entity->data = NULL; + get_BULKI_Entity_size(bulki_entity); + return bulki_entity; +} + +BULKI_Entity * +empty_Bent_Array_Entity() +{ + BULKI_Entity *bulki_entity = (BULKI_Entity *)calloc(1, sizeof(BULKI_Entity)); + bulki_entity->pdc_type = PDC_BULKI_ENT; + bulki_entity->pdc_class = PDC_CLS_ARRAY; + bulki_entity->count = 0; + bulki_entity->data = NULL; + get_BULKI_Entity_size(bulki_entity); + return bulki_entity; +} + +BULKI_Entity * +BULKI_ENTITY_append_BULKI(BULKI_Entity *dest, BULKI *src) +{ + if (src == NULL || dest == NULL) { + printf("Error: bulki is NULL\n"); + return NULL; + } + if (dest->pdc_class != PDC_CLS_ARRAY || dest->pdc_type != PDC_BULKI) { + printf("Error: dest is not an array of BULKI structure\n"); + return NULL; + } + dest->count = dest->count + 1; + dest->data = realloc(dest->data, dest->count * sizeof(BULKI)); + memcpy(dest->data + (dest->count - 1) * sizeof(BULKI), src, sizeof(BULKI)); + get_BULKI_Entity_size(dest); + return dest; +} + BULKI * -pdc_serde_init(int initial_field_count) +BULKI_ENTITY_get_BULKI(BULKI_Entity *bulki_entity, size_t idx) +{ + if (bulki_entity == NULL) { + printf("Error: bulki_entity is NULL\n"); + return NULL; + } + if (bulki_entity->pdc_class != PDC_CLS_ARRAY || bulki_entity->pdc_type != PDC_BULKI) { + printf("Error: bulki_entity is not an array of BULKI structure\n"); + return NULL; + } + if (idx >= bulki_entity->count) { + printf("idx = %d, count = %d Warning: index for bulki_entity is out of bound\n", idx, + bulki_entity->count); + return NULL; + } + return &((BULKI *)bulki_entity->data)[idx]; +} + +BULKI_Entity * +BULKI_ENTITY_append_BULKI_Entity(BULKI_Entity *dest, BULKI_Entity *src) +{ + if (src == NULL || dest == NULL) { + printf("Error: bulki is NULL\n"); + return NULL; + } + if (dest->pdc_class != PDC_CLS_ARRAY || dest->pdc_type != PDC_BULKI_ENT) { + printf("Error: dest is not an array of BULKI_Entity structure\n"); + return NULL; + } + dest->count = dest->count + 1; + dest->data = realloc(dest->data, dest->count * sizeof(BULKI_Entity)); + memcpy(dest->data + (dest->count - 1) * sizeof(BULKI_Entity), src, sizeof(BULKI_Entity)); + get_BULKI_Entity_size(dest); + return dest; +} + +BULKI_Entity * +BULKI_ENTITY_get_BULKI_Entity(BULKI_Entity *bulki_entity, size_t idx) { - BULKI *data = malloc(sizeof(BULKI)); - data->numKeys = initial_field_count; - data->header = malloc(sizeof(BULKI_Header)); - data->header->keys = malloc(sizeof(BULKI_Key) * initial_field_count); - data->header->totalSize = 0; - data->data = malloc(sizeof(BULKI_Data)); - data->data->values = malloc(sizeof(BULKI_Value) * initial_field_count); - data->data->totalSize = 0; - return data; + if (bulki_entity == NULL) { + printf("Error: bulki_entity is NULL\n"); + return NULL; + } + if (bulki_entity->pdc_class != PDC_CLS_ARRAY || bulki_entity->pdc_type != PDC_BULKI_ENT) { + printf("Error: bulki_entity is not an array of BULKI_Entity structure\n"); + return NULL; + } + if (idx >= bulki_entity->count) { + // printf("idx = %d, count = %d Warning: index for bulki_entity is out of bound\n", idx, + // bulki_entity->count); + return NULL; + } + return &((BULKI_Entity *)bulki_entity->data)[idx]; +} + +BULKI_Entity * +BULKI_ENTITY(void *data, uint64_t count, pdc_c_var_type_t pdc_type, pdc_c_var_class_t pdc_class) +{ + if (pdc_type == PDC_BULKI_ENT && pdc_class == PDC_CLS_ITEM) { + printf("Error: BULKI_Entity cannot be an single item in another BULKI_Entity\n"); + return NULL; + } + BULKI_Entity *bulki_entity = (BULKI_Entity *)calloc(1, sizeof(BULKI_Entity)); + bulki_entity->pdc_type = pdc_type; + bulki_entity->pdc_class = pdc_class; + bulki_entity->count = (pdc_class == PDC_CLS_ITEM) ? 1 : count; + size_t size = get_size_by_class_n_type(data, count, pdc_class, pdc_type); + if (pdc_type == PDC_BULKI) { + size = sizeof(BULKI) * bulki_entity->count; + bulki_entity->data = data; + } + else if (pdc_type == PDC_BULKI_ENT) { + size = sizeof(BULKI_Entity) * bulki_entity->count; + bulki_entity->data = data; + } + else { + bulki_entity->data = calloc(1, size); + memcpy(bulki_entity->data, data, size); + } + + get_BULKI_Entity_size(bulki_entity); + return bulki_entity; +} + +BULKI_Entity * +BULKI_singleton_ENTITY(void *data, pdc_c_var_type_t pdc_type) +{ + return BULKI_ENTITY(data, 1, pdc_type, PDC_CLS_ITEM); +} + +BULKI_Entity * +BULKI_array_ENTITY(void *data, uint64_t count, pdc_c_var_type_t pdc_type) +{ + return BULKI_ENTITY(data, count, pdc_type, PDC_CLS_ARRAY); +} + +BULKI * +BULKI_init(int initial_field_count) +{ + BULKI *buiki = calloc(1, sizeof(BULKI)); + buiki->numKeys = 0; + buiki->capacity = initial_field_count; + buiki->header = calloc(1, sizeof(BULKI_Header)); + buiki->header->keys = calloc(buiki->capacity, sizeof(BULKI_Entity)); + buiki->header->headerSize = 0; + buiki->data = calloc(1, sizeof(BULKI_Data)); + buiki->data->values = calloc(buiki->capacity, sizeof(BULKI_Entity)); + buiki->data->dataSize = 0; + get_BULKI_size(buiki); + return buiki; +} + +int +BULKI_Entity_equal(BULKI_Entity *be1, BULKI_Entity *be2) +{ + int meta_equal = be1->pdc_type == be2->pdc_type && be1->pdc_class == be2->pdc_class && + be1->count == be2->count && be1->size == be2->size; + if (!meta_equal) { + // printf("Error: be1 and be2 are not equal in terms of metadata\n"); + return 0; + } + if (be1->pdc_class == PDC_CLS_ARRAY) { + if (be1->pdc_type == PDC_BULKI) { + BULKI *bulki_array1 = (BULKI *)be1->data; + BULKI *bulki_array2 = (BULKI *)be2->data; + for (size_t i = 0; i < be1->count; i++) { + if (!BULKI_equal(&bulki_array1[i], &bulki_array2[i])) { + // printf("Error: be1 and be2 are not equal in terms of BULKI data in the array\n"); + return 0; + } + } + } + else if (be1->pdc_type == PDC_BULKI_ENT) { + BULKI_Entity *bulki_entity_array1 = (BULKI_Entity *)be1->data; + BULKI_Entity *bulki_entity_array2 = (BULKI_Entity *)be2->data; + for (size_t i = 0; i < be1->count; i++) { + if (!BULKI_Entity_equal(&bulki_entity_array1[i], &bulki_entity_array2[i])) { + // printf("Error: be1 and be2 are not equal in terms of BULKI_Entity data in the + // array\n"); + return 0; + } + } + } + else { + if (memcmp(be1->data, be2->data, be1->size - sizeof(uint8_t) * 2 - sizeof(uint64_t) * 2) != 0) { + // printf("Error: be1 and be2 are not equal in terms of base type data in the array\n"); + return 0; + } + } + } + else if (be1->pdc_class == PDC_CLS_ITEM) { + if (be1->pdc_type == PDC_BULKI) { + if (!BULKI_equal((BULKI *)be1->data, (BULKI *)be2->data)) { + // printf("Error: be1 and be2 are not equal in terms of BULKI data\n"); + return 0; + } + } + else { + if (memcmp(be1->data, be2->data, be1->size - sizeof(uint8_t) * 2 - sizeof(uint64_t) * 2) != 0) { + // printf("Error: be1 and be2 are not equal in terms of base type data\n"); + return 0; + } + } + } + return 1; +} + +int +BULKI_equal(BULKI *bulki1, BULKI *bulki2) +{ + if (bulki1->numKeys != bulki2->numKeys || bulki1->totalSize != bulki2->totalSize || + bulki1->header->headerSize != bulki2->header->headerSize || + bulki1->data->dataSize != bulki2->data->dataSize) { + printf("Error: bulki1 and bulki2 are not equal in terms of metadata\n"); + return 0; + } + for (size_t i = 0; i < bulki1->numKeys; i++) { + if (!BULKI_Entity_equal(&bulki1->header->keys[i], &bulki2->header->keys[i]) || + !BULKI_Entity_equal(&bulki1->data->values[i], &bulki2->data->values[i])) { + printf("Error: bulki1 and bulki2 are not equal in terms of data\n"); + return 0; + } + } + return 1; } void -pdc_serde_append_key_value(BULKI *data, BULKI_Key *key, BULKI_Value *value) +BULKI_put(BULKI *bulki, BULKI_Entity *key, BULKI_Entity *value) { - data->header->keys[data->numKeys] = *key; + if (bulki == NULL || key == NULL || value == NULL) { + printf("Error: bulki, key, or value is NULL\n"); + return; + } + // search for existing key + BULKI_Entity *existing_value = BULKI_get(bulki, key); + if (existing_value != NULL) { + bulki->header->headerSize -= key->size; + bulki->data->dataSize -= existing_value->size; + memcpy(existing_value, value, sizeof(BULKI_Entity)); + bulki->header->headerSize += key->size; + bulki->data->dataSize += value->size; + get_BULKI_size(bulki); + return; + } + if (bulki->numKeys >= bulki->capacity) { + bulki->capacity *= 2; + bulki->header->keys = realloc(bulki->header->keys, bulki->capacity * sizeof(BULKI_Entity)); + bulki->data->values = realloc(bulki->data->values, bulki->capacity * sizeof(BULKI_Entity)); + } + memcpy(&bulki->header->keys[bulki->numKeys], key, sizeof(BULKI_Entity)); // append bytes for type, size, and key - data->header->totalSize += (sizeof(uint8_t) + sizeof(uint64_t) + key->size); + bulki->header->headerSize += key->size; - data->data->values[data->numValues] = *value; + memcpy(&bulki->data->values[bulki->numKeys], value, sizeof(BULKI_Entity)); // append bytes for class, type, size, and data - data->data->totalSize += (sizeof(uint8_t) + sizeof(uint8_t) + sizeof(uint64_t) + value->size); + bulki->data->dataSize += value->size; - data->numKeys++; - data->totalSize = data->header->totalSize + data->data->totalSize + sizeof(uint64_t) * 6; + bulki->numKeys++; + get_BULKI_size(bulki); } -BULKI_Key * -BULKI_KEY(void *key, pdc_c_var_type_t pdc_type, uint64_t size) +BULKI_Entity * +BULKI_delete(BULKI *bulki, BULKI_Entity *key) { - BULKI_Key *pdc_key = (BULKI_Key *)malloc(sizeof(BULKI_Key)); - uint64_t key_size = (uint64_t)get_size_by_class_n_type(key, size, PDC_CLS_SCALAR, pdc_type); - pdc_key->key = malloc(key_size); - memcpy(pdc_key->key, key, key_size); - pdc_key->pdc_type = pdc_type; - pdc_key->size = key_size; - return pdc_key; + BULKI_Entity *value = NULL; + for (size_t i = 0; i < bulki->numKeys; i++) { + if (BULKI_Entity_equal(&bulki->header->keys[i], key)) { + value = &bulki->data->values[i]; + bulki->header->headerSize -= key->size; + bulki->data->dataSize -= value->size; + bulki->numKeys--; + memcpy(&bulki->header->keys[i], &bulki->header->keys[bulki->numKeys - 1], sizeof(BULKI_Entity)); + memcpy(&bulki->data->values[i], &bulki->data->values[bulki->numKeys - 1], sizeof(BULKI_Entity)); + break; + } + } + get_BULKI_size(bulki); + return value; } -BULKI_Value * -BULKI_VALUE(void *data, pdc_c_var_type_t pdc_type, pdc_c_var_class_t pdc_class, uint64_t size) +BULKI_Entity_Iterator * +Bent_iterator_init(BULKI_Entity *array, void *filter, pdc_c_var_type_t filter_type) { - BULKI_Value *pdc_value = (BULKI_Value *)malloc(sizeof(BULKI_Value)); - size_t value_size = size; - if (pdc_class == PDC_CLS_STRUCT) { - // we are postponing the serialization of the embedded SERDE_SerializedData, so no need to check the - // type here. - BULKI *struct_data = (BULKI *)data; - value_size = struct_data->totalSize; - pdc_value->data = data; + if (array == NULL || array->pdc_class != PDC_CLS_ARRAY) { + printf("Error: not a proper array\n"); + return NULL; + } + BULKI_Entity_Iterator *iter = (BULKI_Entity_Iterator *)calloc(1, sizeof(BULKI_Entity_Iterator)); + iter->entity_array = array; + iter->total_size = array->count; + iter->current_idx = 0; + + if (filter != NULL) { + if (array->pdc_type == filter_type) { + iter->bent_filter = filter; + iter->filter_type = filter_type; + } // if the filter is not appropriate, just ignore it. } - else if (pdc_class <= PDC_CLS_ARRAY) { - value_size = (size_t)get_size_by_class_n_type(data, size, pdc_class, pdc_type); - pdc_value->data = malloc(value_size); - memcpy(pdc_value->data, data, value_size); + return iter; +} + +int +Bent_iterator_has_next_BULKI(BULKI_Entity_Iterator *it) +{ + if (it->bent_filter == NULL) { + return it->current_idx < it->total_size; } else { - printf("Error: unsupported class %d\n", pdc_class); + while (it->current_idx < it->total_size) { + if (it->filter_type == PDC_BULKI) { + BULKI *array_content = (BULKI *)it->entity_array->data; + BULKI *current_bulki = &(array_content[it->current_idx]); + if (BULKI_equal(current_bulki, it->bent_filter)) { + return 1; + } + } + it->current_idx++; + } + } + return 0; +} + +int +Bent_iterator_has_next_Bent(BULKI_Entity_Iterator *it) +{ + if (it->bent_filter == NULL) { + return it->current_idx < it->total_size; + } + else { + while (it->current_idx < it->total_size) { + if (it->filter_type == PDC_BULKI_ENT) { + BULKI_Entity *array_content = (BULKI_Entity *)it->entity_array->data; + BULKI_Entity *current_entity = &(array_content[it->current_idx]); + if (BULKI_Entity_equal(current_entity, it->bent_filter)) { + return 1; + } + } + it->current_idx++; + } + } + return 0; +} + +BULKI * +Bent_iterator_next_BULKI(BULKI_Entity_Iterator *it) +{ + if (it->current_idx < it->total_size) { + if (it->entity_array->pdc_type == PDC_BULKI) { + BULKI *array_content = (BULKI *)it->entity_array->data; + return &(array_content[it->current_idx++]); + } + } + return NULL; +} + +BULKI_Entity * +Bent_iterator_next_Bent(BULKI_Entity_Iterator *it) +{ + if (it->current_idx < it->total_size) { + if (it->entity_array->pdc_type == PDC_BULKI_ENT) { + BULKI_Entity *array_content = (BULKI_Entity *)it->entity_array->data; + return &(array_content[it->current_idx++]); + } + } + return NULL; +} + +BULKI_KV_Pair_Iterator * +BULKI_KV_Pair_iterator_init(BULKI *bulki) +{ + if (bulki == NULL) { + printf("Error: bulki is NULL\n"); return NULL; } - pdc_value->pdc_class = pdc_class; - pdc_value->pdc_type = pdc_type; - pdc_value->size = value_size; - return pdc_value; + BULKI_KV_Pair_Iterator *iter = (BULKI_KV_Pair_Iterator *)calloc(1, sizeof(BULKI_KV_Pair_Iterator)); + iter->bulki = bulki; + iter->total_size = bulki->numKeys; + iter->current_idx = 0; + return iter; } -void -pdc_serde_free(BULKI *data) +int +BULKI_KV_Pair_iterator_has_next(BULKI_KV_Pair_Iterator *it) +{ + return it->current_idx < it->total_size; +} + +BULKI_KV_Pair * +BULKI_KV_Pair_iterator_next(BULKI_KV_Pair_Iterator *it) { - for (size_t i = 0; i < data->header->numKeys; i++) { - free(data->header->keys[i].key); + if (it->current_idx < it->total_size) { + BULKI_KV_Pair *pair = (BULKI_KV_Pair *)calloc(1, sizeof(BULKI_KV_Pair)); + pair->key = it->bulki->header->keys[it->current_idx]; + pair->value = it->bulki->data->values[it->current_idx]; + it->current_idx++; + return pair; } - free(data->header->keys); - for (size_t i = 0; i < data->data->numValues; i++) { - free(data->data->values[i].data); + return NULL; +} + +BULKI_Entity * +BULKI_get(BULKI *bulki, BULKI_Entity *key) +{ + for (size_t i = 0; i < bulki->numKeys; i++) { + if (BULKI_Entity_equal(&bulki->header->keys[i], key)) { + return &bulki->data->values[i]; + } } - free(data->data->values); - free(data->header); - free(data->data); - free(data); + return NULL; } void -pdc_serde_print(BULKI *data) -{ - printf("Header:\n"); - printf(" numKeys: %zu\n", data->header->numKeys); - printf(" totalSize: %zu\n", data->header->totalSize); - for (size_t i = 0; i < data->header->numKeys; i++) { - printf(" key %ld:\n", i); - printf(" type: %d\n", data->header->keys[i].pdc_type); - printf(" size: %zu\n", data->header->keys[i].size); - printf(" key: %s\n", (char *)data->header->keys[i].key); - } - printf("Data:\n"); - printf(" numValues: %zu\n", data->data->numValues); - printf(" totalSize: %zu\n", data->data->totalSize); - for (size_t i = 0; i < data->data->numValues; i++) { - printf(" value %ld:\n", i); - printf(" class: %d\n", data->data->values[i].pdc_class); - printf(" type: %d\n", data->data->values[i].pdc_type); - printf(" size: %zu\n", data->data->values[i].size); - printf(" data: "); - if (data->data->values[i].pdc_type == PDC_STRING) { - printf("%s\n", (char *)data->data->values[i].data); +BULKI_Entity_free(BULKI_Entity *bulk_entity, int free_struct) +{ + if (bulk_entity != NULL) { + if (bulk_entity->pdc_class == PDC_CLS_ARRAY) { + if (bulk_entity->pdc_type == PDC_BULKI) { + BULKI *bulki_array = (BULKI *)bulk_entity->data; + for (size_t i = 0; i < bulk_entity->count; i++) { + BULKI_free(&bulki_array[i], 0); + } + } + else if (bulk_entity->pdc_type == PDC_BULKI_ENT) { + BULKI_Entity *bulki_entity_array = (BULKI_Entity *)bulk_entity->data; + for (size_t i = 0; i < bulk_entity->count; i++) { + BULKI_Entity_free(&bulki_entity_array[i], 0); + } + } + } + else if (bulk_entity->pdc_class == PDC_CLS_ITEM) { + if (bulk_entity->pdc_type == PDC_BULKI) { + BULKI_free((BULKI *)bulk_entity->data, 0); + } + } + free(bulk_entity->data); + if (free_struct) { + free(bulk_entity); + } + } +} // BULKI_Entity_free + +void +BULKI_free(BULKI *bulki, int free_struct) +{ + if (bulki != NULL) { + if (bulki->header != NULL) { + if (bulki->header->keys != NULL) { + for (size_t i = 0; i < bulki->numKeys; i++) { + BULKI_Entity_free(&bulki->header->keys[i], 0); + } + free(bulki->header->keys); + } + free(bulki->header); } - else { - printf("\n"); + if (bulki->data != NULL) { + if (bulki->data->values != NULL) { + for (size_t i = 0; i < bulki->numKeys; i++) { + BULKI_Entity_free(&bulki->data->values[i], 0); + } + free(bulki->data->values); + } + free(bulki->data); + } + if (free_struct) { + free(bulki); } } -} \ No newline at end of file +} // BULKI_free \ No newline at end of file diff --git a/src/commons/serde/bulki_serde.c b/src/commons/serde/bulki_serde.c index 3d6a0caf1..de6e908fd 100644 --- a/src/commons/serde/bulki_serde.c +++ b/src/commons/serde/bulki_serde.c @@ -1,229 +1,384 @@ #include "bulki_serde.h" -uint64_t -get_total_size_for_serialized_data(BULKI *data) -{ - - if (data->totalSize <= 0) { - size_t total_size = data->header->totalSize + data->data->totalSize + sizeof(uint64_t) * 6; - data->totalSize = total_size; - } - return data->totalSize; -} - // clang-format off /** * This function serializes the entire BULKI structure. * * The overview of the serialized binary data layout is: - * +---------------------+---------------------+---------------------+----------------------+----------------------+----------------------+ - * | Size of the Header | Size of the Data | Header Region | Data Offset | Data Region | Data Offset | - * | (uint64_t) | (uint64_t) | | (uint64_t) | | (uint64_t) | - * +---------------------+---------------------+---------------------+----------------------+----------------------+----------------------+ + * +---------------------+---------------------+---------------------+---------------------+---------------------+----------------------+ + * | Total Size | Number of Keys | Size of the Header | Size of the Data | Header Region | Data Region | + * | (uint64_t) | (uint64_t) | (uint64_t) | (uint64_t) | (uint64_t) | (uint64_t) | + * +---------------------+---------------------+---------------------+---------------------+---------------------+----------------------+ * - * The first 3 field is called meta-header, which provides metadata about size of the header region and the size of the data region. - * Note that the size of the header region doesn't include the 'Number of Keys' field. - * Also, the size of the data region doesn't include the 'Data Offset' field. + * The first 4 fields are called meta-header, which provides metadata about the total size of BULKI, number of keys, size of the header region and the size of the data region. * - * Then the following is the header region with two keys: - * +----------------------+-------------------------+-----------------------------+---------------------------+--------------------------+-----------------------------+---------------------------+ - * | Number of K-Vs | Key 1 Type | Key 1 Size | Key 1 Data | Key 2 Type | Key 2 Size | Key 2 Data | - * | (uint64_t) | (uint8_t) | (uint64_t) | (Variable size depending | (uint8_t) | (uint64_t) | (Variable size depending | - * | | | | on Key 1 Size) | | | on Key 2 Size) | - * +----------------------+-------------------------+-----------------------------+---------------------------+--------------------------+-----------------------------+---------------------------+ + * The header/data region contains multiple BULKI entities. + * Each BULKI entity contains the following fields: + * +-------------------------+-----------------------------+---------------------------+--------------------------+-----------------------------+ + * | size | Entitiy class | Entitiy type | count | data | + * | (uint64_t) | (uint8_t) | (uint8_t) | (uint64_t) | (Variable size depending | + * | | | | | on the type and class) | + * +-------------------------+-----------------------------+---------------------------+--------------------------+-----------------------------+ * - * Then, the following is the layout of the data region with the final offset validation point. - * - * |----------------------------------------------------------------------------------------------------------------| - * | Number of K-V Pairs (uint64_t) | Value 1 Class (uint8_t) | Value 1 Type (uint8_t) | Value 1 Size (uint64_t)| - * |----------------------------------------------------------------------------------------------------------------| - * | Value 1 Data (Variable size depending on Value 1 Size) | Value 2 Class (uint8_t) | Value 2 Type (uint8_t) | - * |----------------------------------------------------------------------------------------------------------------| - * | Value 2 Size (uint64_t) | Value 2 Data (Variable size depending on Value 2 Size) | - * |----------------------------------------------------------------------------------------------------------------| - * | ...repeated for the number of value entries in the data... | - * |----------------------------------------------------------------------------------------------------------------| - * | Final Data Offset (uint64_t) | - * |----------------------------------------------------------------------------------------------------------------| + * Note that the data field in the BULKI entity is a pointer to either an array of BULKI entities , an array of BULKI structures, an array of base type items, + * or a single item of BULKI, or a single item of a base type. + * + * After the header region, there is a data offset field, which is used to validate the header region. + * + * The data region contains multiple BULKI entities, each of which is a value entry. + * + * After the data region, there is another data offset field, which is used to validate the data region. * * Please refer to `get_size_by_class_n_type` function in pdc_generic.h for size calculation on scalar values and array values. + * + * For performance and simplicity, we do not recommend to use BULKI for large and deeply embedded data structures. * */ + // clang-format on + +/********************** Serialize ************************** */ + void * -BULKI_serde_serialize(BULKI *data) +BULKI_Entity_serialize_to_buffer(BULKI_Entity *entity, void *buffer, size_t *offset) { - // The buffer contains: - // the size of the header (size_t) + - // the size of the data (size_t) + - // the number of keys (size_t) + - // the header region + - // the data offset (size_t) + - // the number of value entries (size_t) + - // the data region - void *buffer = malloc(get_total_size_for_serialized_data(data)); - // serialize the meta header, which contains only the size of the header and the size of the data region. - memcpy(buffer, &data->header->totalSize, sizeof(size_t)); - memcpy(buffer + sizeof(size_t), &data->data->totalSize, sizeof(size_t)); + // printf("offset: %zu\n", *offset); + // serialize the size + uint64_t size = (uint64_t)get_BULKI_Entity_size(entity); + memcpy(buffer + *offset, &size, sizeof(uint64_t)); + *offset += sizeof(uint64_t); - // serialize the header - // start with the number of keys - memcpy(buffer + sizeof(size_t) * 2, &data->header->numKeys, sizeof(size_t)); - // then the keys - size_t offset = sizeof(size_t) * 3; - for (size_t i = 0; i < data->header->numKeys; i++) { - int8_t pdc_type = (int8_t)(data->header->keys[i].pdc_type); - memcpy(buffer + offset, &pdc_type, sizeof(int8_t)); - offset += sizeof(int8_t); - memcpy(buffer + offset, &data->header->keys[i].size, sizeof(size_t)); - offset += sizeof(size_t); - memcpy(buffer + offset, data->header->keys[i].key, data->header->keys[i].size); - offset += data->header->keys[i].size; - } + // serialize the class + int8_t pdc_class = (int8_t)(entity->pdc_class); + memcpy(buffer + *offset, &pdc_class, sizeof(int8_t)); + *offset += sizeof(int8_t); + + // serialize the type + int8_t pdc_type = (int8_t)(entity->pdc_type); + memcpy(buffer + *offset, &pdc_type, sizeof(int8_t)); + *offset += sizeof(int8_t); - // serialize the data offset, this is for validation purpose to see if header region is corrupted. - memcpy(buffer + offset, &offset, sizeof(size_t)); - offset += sizeof(size_t); + // serialize the count + uint64_t count = (uint64_t)(entity->count); + memcpy(buffer + *offset, &count, sizeof(uint64_t)); + *offset += sizeof(uint64_t); + + // printf("PRE-ser: size: %zu, class: %d, type: %d, count: %zu, offset: %zu\n", entity->size, + // entity->pdc_class, entity->pdc_type, entity->count, *offset); // serialize the data - // start with the number of value entries - memcpy(buffer + offset, &data->data->numValues, sizeof(size_t)); - offset += sizeof(size_t); - // then the values - for (size_t i = 0; i < data->data->numValues; i++) { - int8_t pdc_class = (int8_t)data->data->values[i].pdc_class; - int8_t pdc_type = (int8_t)data->data->values[i].pdc_type; - memcpy(buffer + offset, &pdc_class, sizeof(int8_t)); - offset += sizeof(int8_t); - memcpy(buffer + offset, &pdc_type, sizeof(int8_t)); - offset += sizeof(int8_t); - memcpy(buffer + offset, &data->data->values[i].size, sizeof(size_t)); - offset += sizeof(size_t); - - if (data->data->values[i].pdc_class == PDC_CLS_STRUCT) { - void *sdata = BULKI_serde_serialize((BULKI *)(data->data->values[i].data)); - memcpy(buffer + offset, sdata, data->data->values[i].size); + if (entity->pdc_class == PDC_CLS_ITEM) { + if (entity->pdc_type == PDC_BULKI) { // BULKI + BULKI *bulki = (BULKI *)(entity->data); + BULKI_serialize_to_buffer(bulki, buffer, offset); + } + else { // all base types + memcpy(buffer + *offset, entity->data, entity->size - sizeof(uint8_t) * 2 - sizeof(uint64_t) * 2); + *offset += (entity->size - sizeof(uint8_t) * 2 - sizeof(uint64_t) * 2); + } + } + else if (entity->pdc_class <= PDC_CLS_ARRAY) { + if (pdc_type == PDC_BULKI) { // BULKI + for (size_t i = 0; i < entity->count; i++) { + BULKI *bulki = ((BULKI *)entity->data) + i; + BULKI_serialize_to_buffer(bulki, buffer, offset); + } } - else if (data->data->values[i].pdc_class <= PDC_CLS_ARRAY) { - memcpy(buffer + offset, data->data->values[i].data, data->data->values[i].size); + else if (pdc_type == PDC_BULKI_ENT) { // BULKI_Entity + for (size_t i = 0; i < entity->count; i++) { + BULKI_Entity *bulki_entity = ((BULKI_Entity *)entity->data) + i; + BULKI_Entity_serialize_to_buffer(bulki_entity, buffer, offset); + } } - else { - printf("Error: unsupported class type %d\n", data->data->values[i].pdc_class); - return NULL; + else { // all base types + memcpy(buffer + *offset, entity->data, entity->size - sizeof(uint8_t) * 2 - sizeof(uint64_t) * 2); + *offset += (entity->size - sizeof(uint8_t) * 2 - sizeof(uint64_t) * 2); } + } + else { + printf("Error: unsupported class type %d\n", entity->pdc_class); + return NULL; + } + + // printf("POST-ser: size: %zu, class: %d, type: %d, count: %zu, offset: %zu\n", entity->size, + // entity->pdc_class, entity->pdc_type, entity->count, *offset); + return buffer; +} + +void * +BULKI_Entity_serialize(BULKI_Entity *entity) +{ + void * buffer = calloc(1, get_BULKI_Entity_size(entity)); + size_t offset = 0; + BULKI_Entity_serialize_to_buffer(entity, buffer, &offset); + // printf("offset: %zu\n", offset); + return buffer; +} + +void * +BULKI_serialize_to_buffer(BULKI *bulki, void *buffer, size_t *offset) +{ + // serialize the total size + memcpy(buffer + *offset, &bulki->totalSize, sizeof(uint64_t)); + *offset += sizeof(uint64_t); + + // serialize the number of keys + memcpy(buffer + *offset, &bulki->numKeys, sizeof(uint64_t)); + *offset += sizeof(uint64_t); + + // serialize the header size + memcpy(buffer + *offset, &bulki->header->headerSize, sizeof(uint64_t)); + *offset += sizeof(uint64_t); + + // serialize the data size + memcpy(buffer + *offset, &bulki->data->dataSize, sizeof(uint64_t)); + *offset += sizeof(uint64_t); - offset += data->data->values[i].size; - memcpy(buffer + offset, data->data->values[i].data, data->data->values[i].size); - offset += data->data->values[i].size; + // serialize the header + for (size_t i = 0; i < bulki->numKeys; i++) { + BULKI_Entity_serialize_to_buffer(&(bulki->header->keys[i]), buffer, offset); + } + + // serialize the data offset + uint64_t ofst = (uint64_t)(*offset); + memcpy(buffer + *offset, &ofst, sizeof(uint64_t)); + *offset += sizeof(uint64_t); + + // serialize the data + for (size_t i = 0; i < bulki->numKeys; i++) { + BULKI_Entity_serialize_to_buffer(&(bulki->data->values[i]), buffer, offset); } - // serialize the data offset again, this is for validation purpose to see if data region is corrupted. - memcpy(buffer + offset, &offset, sizeof(size_t)); - offset += sizeof(size_t); + + // serialize the data offset + ofst = (uint64_t)(*offset) + sizeof(uint64_t); + memcpy(buffer + *offset, &ofst, sizeof(uint64_t)); + *offset += sizeof(uint64_t); + return buffer; } -BULKI * -BULKI_serde_deserialize(void *buffer) +void * +BULKI_serialize(BULKI *data) { + void * buffer = calloc(1, get_BULKI_size(data)); size_t offset = 0; - // read the meta header - size_t headerSize; - size_t dataSize; - memcpy(&headerSize, buffer + offset, sizeof(size_t)); - offset += sizeof(size_t); - memcpy(&dataSize, buffer + offset, sizeof(size_t)); - offset += sizeof(size_t); + BULKI_serialize_to_buffer(data, buffer, &offset); + // printf("offset: %zu\n", offset); + return buffer; +} - printf("headerSize: %zu\n", headerSize); - printf("dataSize: %zu\n", dataSize); +void +BULKI_Entity_serialize_to_file(BULKI_Entity *entity, FILE *fp) +{ + void *buffer = BULKI_Entity_serialize(entity); + fwrite(buffer, get_BULKI_Entity_size(entity), 1, fp); + free(buffer); + fclose(fp); +} - // read the header - size_t numKeys; - memcpy(&numKeys, buffer + offset, sizeof(size_t)); - offset += sizeof(size_t); +void +BULKI_serialize_to_file(BULKI *bulki, FILE *fp) +{ + void *buffer = BULKI_serialize(bulki); + fwrite(buffer, get_BULKI_size(bulki), 1, fp); + free(buffer); + fclose(fp); +} - printf("numKeys: %zu\n", numKeys); +/********************** Deserialize ************************** */ - BULKI_Header *header = malloc(sizeof(BULKI_Header)); - header->keys = malloc(sizeof(BULKI_Key) * numKeys); - header->numKeys = numKeys; - header->totalSize = headerSize; +BULKI_Entity * +BULKI_Entity_deserialize_from_buffer(void *buffer, size_t *offset) +{ + // printf("offset: %zu\n", *offset); + BULKI_Entity *entity = malloc(sizeof(BULKI_Entity)); + // deserialize the size + uint64_t size; + memcpy(&size, buffer + *offset, sizeof(uint64_t)); + entity->size = (size_t)size; + *offset += sizeof(uint64_t); + + // deserialize the class + int8_t pdc_class; + memcpy(&pdc_class, buffer + *offset, sizeof(int8_t)); + *offset += sizeof(int8_t); + entity->pdc_class = (pdc_c_var_class_t)pdc_class; + + // deserialize the type + int8_t pdc_type; + memcpy(&pdc_type, buffer + *offset, sizeof(int8_t)); + *offset += sizeof(int8_t); + entity->pdc_type = (pdc_c_var_type_t)pdc_type; + + // deserialize the count + uint64_t count; + memcpy(&count, buffer + *offset, sizeof(uint64_t)); + entity->count = (size_t)count; + *offset += sizeof(uint64_t); + + // printf("PRE-DE: size: %zu, class: %d, type: %d, count: %zu, offset: %zu\n", entity->size, + // entity->pdc_class, entity->pdc_type, entity->count, *offset); + + // deserialize the data + if (entity->pdc_class == PDC_CLS_ITEM) { + if (entity->pdc_type == PDC_BULKI) { // BULKI + entity->data = BULKI_deserialize_from_buffer(buffer, offset); + } + else if (entity->pdc_type == PDC_BULKI_ENT) { + entity->data = BULKI_Entity_deserialize_from_buffer(buffer, offset); + } + else { // all base types + entity->data = malloc(entity->size - sizeof(uint8_t) * 2 - sizeof(uint64_t) * 2); + memcpy(entity->data, buffer + *offset, entity->size - sizeof(uint8_t) * 2 - sizeof(uint64_t) * 2); + *offset += (entity->size - sizeof(uint8_t) * 2 - sizeof(uint64_t) * 2); + } + } + else if (entity->pdc_class <= PDC_CLS_ARRAY) { + if (pdc_type == PDC_BULKI) { // BULKI + BULKI *bulki_array = malloc(sizeof(BULKI) * entity->count); + for (size_t i = 0; i < entity->count; i++) { + memcpy(bulki_array + i, BULKI_deserialize_from_buffer(buffer, offset), sizeof(BULKI)); + } + entity->data = bulki_array; + } + else if (pdc_type == PDC_BULKI_ENT) { // BULKI_Entity + BULKI_Entity *bulki_entity_array = malloc(sizeof(BULKI_Entity) * entity->count); + for (size_t i = 0; i < entity->count; i++) { + memcpy(bulki_entity_array + i, BULKI_Entity_deserialize_from_buffer(buffer, offset), + sizeof(BULKI_Entity)); + } + entity->data = bulki_entity_array; + } + else { // all base types + entity->data = malloc(entity->size - sizeof(uint8_t) * 2 - sizeof(uint64_t) * 2); + memcpy(entity->data, buffer + *offset, entity->size - sizeof(uint8_t) * 2 - sizeof(uint64_t) * 2); + *offset += (entity->size - sizeof(uint8_t) * 2 - sizeof(uint64_t) * 2); + } + } + else { + printf("Error: unsupported class type %d\n", entity->pdc_class); + return NULL; + } + + // printf("POST-DE: size: %zu, class: %d, type: %d, count: %zu, offset: %zu\n", entity->size, + // entity->pdc_class, entity->pdc_type, entity->count, *offset); + return entity; +} + +BULKI * +BULKI_deserialize_from_buffer(void *buffer, size_t *offset) +{ + BULKI *bulki = malloc(sizeof(BULKI)); + // deserialize the total size + uint64_t totalSize; + memcpy(&totalSize, buffer + *offset, sizeof(uint64_t)); + bulki->totalSize = totalSize; + *offset += sizeof(uint64_t); + // printf("totalSize: %zu\n", bulki->totalSize); - printf("iterating %zu keys in the header\n", numKeys); + // deserialize the number of keys + uint64_t numKeys; + memcpy(&numKeys, buffer + *offset, sizeof(uint64_t)); + bulki->numKeys = numKeys; + *offset += sizeof(uint64_t); + // deserialize the header size + uint64_t headerSize; + memcpy(&headerSize, buffer + *offset, sizeof(uint64_t)); + *offset += sizeof(uint64_t); + + // deserialize the data size + uint64_t dataSize; + memcpy(&dataSize, buffer + *offset, sizeof(uint64_t)); + *offset += sizeof(uint64_t); + + // deserialize the header + BULKI_Header *header = malloc(sizeof(BULKI_Header)); + header->keys = malloc(sizeof(BULKI_Entity) * numKeys); + header->headerSize = headerSize; for (size_t i = 0; i < numKeys; i++) { - int8_t pdc_type; - size_t size; - memcpy(&pdc_type, buffer + offset, sizeof(int8_t)); - offset += sizeof(int8_t); - memcpy(&size, buffer + offset, sizeof(size_t)); - offset += sizeof(size_t); - void *key = malloc(size); - memcpy(key, buffer + offset, size); - offset += size; - header->keys[i].key = key; - header->keys[i].pdc_type = (pdc_c_var_type_t)pdc_type; - header->keys[i].size = size; - - printf("key %zu: %s, size: %zu, type: %s\n", i, (char *)key, size, get_name_by_dtype(pdc_type)); + memcpy(&(header->keys[i]), BULKI_Entity_deserialize_from_buffer(buffer, offset), + sizeof(BULKI_Entity)); } - // read the data offset - size_t dataOffset; - memcpy(&dataOffset, buffer + offset, sizeof(size_t)); + // deserialize the data offset + uint64_t dataOffset; + memcpy(&dataOffset, buffer + *offset, sizeof(uint64_t)); // check the data offset - if (dataOffset != offset) { + if (((size_t)dataOffset) != *offset) { printf("Error: data offset does not match the expected offset.\n"); return NULL; } - offset += sizeof(size_t); + *offset += sizeof(uint64_t); - // read the data - size_t numValues; - memcpy(&numValues, buffer + offset, sizeof(size_t)); - offset += sizeof(size_t); + bulki->header = header; + + // deserialize the data BULKI_Data *data = malloc(sizeof(BULKI_Data)); - data->values = malloc(sizeof(BULKI_Value) * numValues); - data->numValues = numValues; - data->totalSize = dataSize; - for (size_t i = 0; i < numValues; i++) { - int8_t pdc_class; - int8_t pdc_type; - size_t size; - memcpy(&pdc_class, buffer + offset, sizeof(int8_t)); - offset += sizeof(int8_t); - memcpy(&pdc_type, buffer + offset, sizeof(int8_t)); - offset += sizeof(int8_t); - memcpy(&size, buffer + offset, sizeof(size_t)); - offset += sizeof(size_t); - void *value = malloc(size); - memcpy(value, buffer + offset, size); - offset += size; - - // TODO: postponed deserialization of struct data, need to be finished here. - data->values[i].pdc_class = (pdc_c_var_class_t)pdc_class; - data->values[i].pdc_type = (pdc_c_var_type_t)pdc_type; - data->values[i].size = size; - data->values[i].data = value; - printf("value %zu: size: %zu, type: %s\n", i, size, get_name_by_dtype(pdc_type)); + data->values = malloc(sizeof(BULKI_Entity) * numKeys); + data->dataSize = dataSize; + for (size_t i = 0; i < numKeys; i++) { + memcpy(&(data->values[i]), BULKI_Entity_deserialize_from_buffer(buffer, offset), + sizeof(BULKI_Entity)); } // check the total size - memcpy(&dataOffset, buffer + offset, sizeof(size_t)); + memcpy(&dataOffset, buffer + *offset, sizeof(uint64_t)); + *offset += sizeof(uint64_t); + // printf("dataOffset: %zu, offset: %zu\n", dataOffset, *offset); + // check the data offset - if (dataOffset != offset) { + if (((size_t)dataOffset) != *offset) { printf("Error: data offset does not match the expected offset.\n"); return NULL; } - offset += sizeof(size_t); - if (offset != headerSize + sizeof(size_t) * 6 + dataSize) { - printf("Error: total size does not match the expected size.\n"); - return NULL; - } - // create the serialized data - BULKI *serializedData = malloc(sizeof(BULKI)); - serializedData->header = header; - serializedData->data = data; - serializedData->totalSize = headerSize + dataSize + sizeof(size_t) * 6; - return serializedData; + bulki->data = data; + + return bulki; +} + +BULKI_Entity * +BULKI_Entity_deserialize(void *buffer) +{ + size_t offset = 0; + return BULKI_Entity_deserialize_from_buffer(buffer, &offset); +} + +BULKI * +BULKI_deserialize(void *buffer) +{ + size_t offset = 0; + return BULKI_deserialize_from_buffer(buffer, &offset); +} + +BULKI_Entity * +BULKI_Entity_deserialize_from_file(FILE *fp) +{ + fseek(fp, 0, SEEK_END); + size_t fsize = ftell(fp); + fseek(fp, 0, SEEK_SET); /* same as rewind(f); */ + // read the file into the buffer + void *buffer = malloc(fsize + 1); + fread(buffer, fsize, 1, fp); + // printf("Read %ld bytes\n", fsize); + fclose(fp); + BULKI_Entity *rst = BULKI_Entity_deserialize(buffer); + free(buffer); + return rst; } + +BULKI * +BULKI_deserialize_from_file(FILE *fp) +{ + fseek(fp, 0, SEEK_END); + size_t fsize = ftell(fp); + fseek(fp, 0, SEEK_SET); /* same as rewind(f); */ + // read the file into the buffer + void *buffer = malloc(fsize + 1); + fread(buffer, fsize, 1, fp); + // printf("Read %ld bytes\n", fsize); + fclose(fp); + BULKI *rst = BULKI_deserialize(buffer); + free(buffer); + return rst; +} \ No newline at end of file diff --git a/src/commons/serde/bulki_serde_test.c b/src/commons/serde/bulki_serde_test.c index 59ebbae9d..18e932483 100644 --- a/src/commons/serde/bulki_serde_test.c +++ b/src/commons/serde/bulki_serde_test.c @@ -1,84 +1,289 @@ #include "bulki_serde.h" int -test_serde_framework() +test_base_type() { // Initialize a serialized data structure - BULKI *data = BULKI_serde_init(5); + BULKI *bulki = BULKI_init(2); // Create and append key-value pairs for different data types - char * intKey_str = "int"; - int intVal = 42; - BULKI_Key * intKey = BULKI_KEY(intKey_str, PDC_STRING, sizeof(intKey_str)); - BULKI_Value *intValue = BULKI_VALUE(&intVal, PDC_INT, PDC_CLS_SCALAR, sizeof(int)); - BULKI_serde_append_key_value(data, intKey, intValue); - - char * doubleKey_str = "double"; - double doubleVal = 3.14159; - BULKI_Key * doubleKey = BULKI_KEY(doubleKey_str, PDC_STRING, sizeof(doubleKey_str)); - BULKI_Value *doubleValue = BULKI_VALUE(&doubleVal, PDC_DOUBLE, PDC_CLS_SCALAR, sizeof(double)); - BULKI_serde_append_key_value(data, doubleKey, doubleValue); - - char * strKey_str = "string"; - char * strVal = "Hello, World!"; - BULKI_Key * strKey = BULKI_KEY(strKey_str, PDC_STRING, (strlen(strKey_str) + 1) * sizeof(char)); - BULKI_Value *strValue = - BULKI_VALUE(strVal, PDC_STRING, PDC_CLS_SCALAR, (strlen(strVal) + 1) * sizeof(char)); - BULKI_serde_append_key_value(data, strKey, strValue); - - char * arrayKey_str = "array"; - int intArray[3] = {1, 2, 3}; - BULKI_Key * arrayKey = BULKI_KEY(arrayKey_str, PDC_STRING, sizeof(arrayKey_str)); - BULKI_Value *arrayValue = BULKI_VALUE(intArray, PDC_INT, PDC_CLS_ARRAY, 3); - BULKI_serde_append_key_value(data, arrayKey, arrayValue); - - typedef struct { - int x; - int y; - } Point; - - Point pointVal = {10, 20}; - - // prepare the data of a struct - BULKI * point_data = BULKI_serde_init(2); - BULKI_Key * x_name = BULKI_KEY("x", PDC_STRING, sizeof(char *)); - BULKI_Value *x_value = BULKI_VALUE(&pointVal.x, PDC_INT, PDC_CLS_SCALAR, sizeof(int)); - BULKI_Key * y_name = BULKI_KEY("y", PDC_STRING, sizeof(char *)); - BULKI_Value *y_value = BULKI_VALUE(&pointVal.y, PDC_INT, PDC_CLS_SCALAR, sizeof(int)); - - BULKI_serde_append_key_value(point_data, x_name, x_value); - BULKI_serde_append_key_value(point_data, y_name, y_value); - - // append the struct data as a key value pair, along with a key. - char * pointKey = "point"; - BULKI_Key * structKey = BULKI_KEY(pointKey, PDC_STRING, sizeof(pointKey)); - BULKI_Value *structValue = BULKI_VALUE(point_data, PDC_VOID_PTR, PDC_CLS_STRUCT, sizeof(Point)); - BULKI_serde_append_key_value(data, structKey, structValue); + char * intKey_str = "int"; + int intVal = 42; + BULKI_Entity *intKey = BULKI_ENTITY(intKey_str, 1, PDC_STRING, PDC_CLS_ITEM); + BULKI_Entity *intValue = BULKI_ENTITY(&intVal, 1, PDC_INT, PDC_CLS_ITEM); + BULKI_put(bulki, intKey, intValue); + + int *intArrVal = (int *)malloc(3 * sizeof(int)); + intArrVal[0] = 9; // x + intArrVal[1] = 8; // y + intArrVal[2] = 7; // z + BULKI_Entity *intArrKey = BULKI_ENTITY(intArrVal, 3, PDC_INT, PDC_CLS_ARRAY); + BULKI_put(bulki, intArrKey, intValue); + + char * doubleKey_str = "double"; + double doubleVal = 3.14159; + BULKI_Entity *doubleKey = BULKI_ENTITY(doubleKey_str, 1, PDC_STRING, PDC_CLS_ITEM); + BULKI_Entity *doubleValue = BULKI_ENTITY(&doubleVal, 1, PDC_DOUBLE, PDC_CLS_ITEM); + BULKI_put(bulki, doubleKey, doubleValue); + + char * strKey_str = "string"; + char * strVal = "Hello, World!"; + BULKI_Entity *strKey = BULKI_ENTITY(strKey_str, 1, PDC_STRING, PDC_CLS_ITEM); + BULKI_Entity *strValue = BULKI_ENTITY(strVal, 1, PDC_STRING, PDC_CLS_ITEM); + BULKI_put(bulki, strKey, strValue); // Serialize the data - void *buffer = BULKI_serde_serialize(data); + void *buffer = BULKI_serialize(bulki); + + // printf("Serialized data:\n"); + // BULKI_print(bulki); + + // Do some I/O if you like + FILE *fp = fopen("test_bulki.bin", "wb"); + fwrite(buffer, 1, bulki->totalSize, fp); + fclose(fp); - printf("Serialized data:\n"); - BULKI_serde_print(data); + // read the file and deserialize + fp = fopen("test_bulki.bin", "rb"); + fseek(fp, 0, SEEK_END); + long fsize = ftell(fp); + fseek(fp, 0, SEEK_SET); /* same as rewind(f); */ + // read the file into the buffer + void *buffer2 = malloc(fsize + 1); + fread(buffer2, fsize, 1, fp); + // printf("Read %ld bytes\n", fsize); + fclose(fp); // Deserialize the buffer - BULKI *deserializedData = BULKI_serde_deserialize(buffer); + BULKI *deserializedBulki = BULKI_deserialize(buffer2); - printf("Deserialized data:\n"); + // printf("Deserialized data:\n"); + // BULKI_print(deserializedBulki); - // Print the deserialized data - BULKI_serde_print(deserializedData); + int equal = BULKI_equal(bulki, deserializedBulki); + printf("bulki == deserializedBulki: %d\n", equal); // Free the memory - BULKI_serde_free(data); - BULKI_serde_free(deserializedData); + BULKI_free(deserializedBulki, 1); + // printf("Freed deserializedBulki\n"); + BULKI_free(bulki, 1); + // printf("Freed bulki\n"); free(buffer); - return 0; + return equal; +} + +int +test_put_replace() +{ + // Initialize a serialized data structure + BULKI *bulki = BULKI_init(2); + + // Create and append key-value pairs for different data types + BULKI_put(bulki, BULKI_ENTITY("key1", 1, PDC_STRING, PDC_CLS_ITEM), + BULKI_ENTITY("value1", 1, PDC_STRING, PDC_CLS_ITEM)); + + BULKI_put(bulki, BULKI_ENTITY("key2", 1, PDC_STRING, PDC_CLS_ITEM), + BULKI_ENTITY("value2", 1, PDC_STRING, PDC_CLS_ITEM)); + + uint64_t u64value = 7987; + BULKI_put(bulki, BULKI_ENTITY("key1", 1, PDC_STRING, PDC_CLS_ITEM), + BULKI_ENTITY(&u64value, 1, PDC_UINT64, PDC_CLS_ITEM)); + + BULKI_Entity *dataEnt = BULKI_get(bulki, BULKI_ENTITY("key1", 1, PDC_STRING, PDC_CLS_ITEM)); + int equal = BULKI_Entity_equal(dataEnt, BULKI_ENTITY(&u64value, 1, PDC_UINT64, PDC_CLS_ITEM)); + printf("first value is desired after replacing the original value: %d\n", equal); + dataEnt = BULKI_get(bulki, BULKI_ENTITY("key2", 1, PDC_STRING, PDC_CLS_ITEM)); + equal = BULKI_Entity_equal(dataEnt, BULKI_ENTITY("value2", 1, PDC_STRING, PDC_CLS_ITEM)); + printf("second value not changed after replace put: %d\n", equal); + return equal; +} + +int +test_base_array_entitiy() +{ + // Initialize a serialized data structure + BULKI *bulki = BULKI_init(2); + + // Create and append key-value pairs for different data types + char * intKey_str = "int"; + int intVal = 42; + uint64_t intObjID = 12416574651687; + BULKI_Entity *intKey = BULKI_ENTITY(intKey_str, 1, PDC_STRING, PDC_CLS_ITEM); + BULKI_Entity *intArr = empty_Bent_Array_Entity(); + BULKI_ENTITY_append_BULKI_Entity(intArr, BULKI_ENTITY(&intVal, 1, PDC_INT, PDC_CLS_ITEM)); + BULKI_ENTITY_append_BULKI_Entity(intArr, BULKI_ENTITY(&intObjID, 1, PDC_UINT64, PDC_CLS_ITEM)); + BULKI_put(bulki, intKey, intArr); + + char * doubleKey_str = "double"; + double doubleVal = 3.14159; + uint64_t doubleObjID = 564987951987494; + BULKI_Entity *doubleKey = BULKI_ENTITY(doubleKey_str, 1, PDC_STRING, PDC_CLS_ITEM); + BULKI_Entity *doubleArr = empty_Bent_Array_Entity(); + BULKI_ENTITY_append_BULKI_Entity(doubleArr, BULKI_ENTITY(&doubleVal, 1, PDC_DOUBLE, PDC_CLS_ITEM)); + BULKI_ENTITY_append_BULKI_Entity(doubleArr, BULKI_ENTITY(&doubleObjID, 1, PDC_UINT64, PDC_CLS_ITEM)); + BULKI_put(bulki, doubleKey, doubleArr); + + char *strKey_str = "string"; + char *strVal = "Hello, World!"; + + BULKI_Entity *strKey = BULKI_ENTITY(strKey_str, 1, PDC_STRING, PDC_CLS_ITEM); + BULKI_Entity *strArr = empty_Bent_Array_Entity(); + BULKI_ENTITY_append_BULKI_Entity(strArr, BULKI_ENTITY(strVal, 1, PDC_STRING, PDC_CLS_ITEM)); + BULKI_ENTITY_append_BULKI_Entity(strArr, BULKI_ENTITY(&intObjID, 1, PDC_UINT64, PDC_CLS_ITEM)); + BULKI_put(bulki, strKey, strArr); + + char * mixedKey_str = "mixed"; + BULKI_Entity *mixedKey = BULKI_ENTITY(mixedKey_str, 1, PDC_STRING, PDC_CLS_ITEM); + BULKI_Entity *mixedArr = empty_Bent_Array_Entity(); + BULKI_ENTITY_append_BULKI_Entity(mixedArr, BULKI_ENTITY(&intVal, 1, PDC_INT, PDC_CLS_ITEM)); + BULKI_ENTITY_append_BULKI_Entity(mixedArr, BULKI_ENTITY(&doubleVal, 1, PDC_DOUBLE, PDC_CLS_ITEM)); + BULKI_ENTITY_append_BULKI_Entity(mixedArr, BULKI_ENTITY(strVal, 1, PDC_STRING, PDC_CLS_ITEM)); + BULKI_ENTITY_append_BULKI_Entity(mixedArr, BULKI_ENTITY(&intObjID, 1, PDC_UINT64, PDC_CLS_ITEM)); + BULKI_put(bulki, mixedKey, mixedArr); + + // Initialize a serialized data structure + BULKI *bulki2 = BULKI_init(2); + + // Create and append key-value pairs for different data types + char * intKey_str2 = "int"; + int intVal2 = 42; + BULKI_Entity *intKey2 = BULKI_ENTITY(intKey_str, 1, PDC_STRING, PDC_CLS_ITEM); + BULKI_Entity *intArr2 = empty_BULKI_Array_Entity(); + BULKI_ENTITY_append_BULKI(intArr2, bulki); + BULKI_ENTITY_append_BULKI(intArr2, bulki); + BULKI_put(bulki2, intKey2, intArr2); + + // Serialize the data + void *buffer = BULKI_serialize(bulki2); + + // printf("Serialized data:\n"); + // BULKI_print(bulki2); + + // Deserialize the buffer + BULKI *deserializedBulki = BULKI_deserialize(buffer); + + // printf("Deserialized data:\n"); + // BULKI_print(deserializedBulki); + + int equal = BULKI_equal(bulki2, deserializedBulki); + printf("bulki2 == deserializedBulki: %d\n", equal); + + // Free the memory + BULKI_free(deserializedBulki, 1); + // printf("Freed deserializedBulki\n"); + BULKI_free(bulki, 1); + // printf("Freed bulki\n"); + free(buffer); + + return equal; +} + +int +test_embedded_entitiy() +{ + // Initialize a serialized data structure + BULKI *bulki = BULKI_init(2); + + // Create and append key-value pairs for different data types + char * intKey_str = "int"; + int intVal = 42; + uint64_t intObjID = 12416574651687; + BULKI_Entity *intKey = BULKI_ENTITY(intKey_str, 1, PDC_STRING, PDC_CLS_ITEM); + BULKI_Entity *intArr = empty_Bent_Array_Entity(); + BULKI_ENTITY_append_BULKI_Entity(intArr, BULKI_ENTITY(&intVal, 1, PDC_INT, PDC_CLS_ITEM)); + BULKI_ENTITY_append_BULKI_Entity(intArr, BULKI_ENTITY(&intObjID, 1, PDC_UINT64, PDC_CLS_ITEM)); + BULKI_put(bulki, intKey, intArr); + + char * doubleKey_str = "double"; + double doubleVal = 3.14159; + uint64_t doubleObjID = 564987951987494; + BULKI_Entity *doubleKey = BULKI_ENTITY(doubleKey_str, 1, PDC_STRING, PDC_CLS_ITEM); + BULKI_Entity *doubleArr = empty_Bent_Array_Entity(); + BULKI_ENTITY_append_BULKI_Entity(doubleArr, BULKI_ENTITY(&doubleVal, 1, PDC_DOUBLE, PDC_CLS_ITEM)); + BULKI_ENTITY_append_BULKI_Entity(doubleArr, BULKI_ENTITY(&doubleObjID, 1, PDC_UINT64, PDC_CLS_ITEM)); + BULKI_put(bulki, doubleKey, doubleArr); + + char *strKey_str = "string"; + char *strVal = "Hello, World!"; + + BULKI_Entity *strKey = BULKI_ENTITY(strKey_str, 1, PDC_STRING, PDC_CLS_ITEM); + BULKI_Entity *strArr = empty_Bent_Array_Entity(); + BULKI_ENTITY_append_BULKI_Entity(strArr, BULKI_ENTITY(strVal, 1, PDC_STRING, PDC_CLS_ITEM)); + BULKI_ENTITY_append_BULKI_Entity(strArr, BULKI_ENTITY(&intObjID, 1, PDC_UINT64, PDC_CLS_ITEM)); + BULKI_put(bulki, strKey, strArr); + + // Serialize the data + void *buffer = BULKI_serialize(bulki); + + // printf("Serialized data:\n"); + // BULKI_print(bulki); + + // Deserialize the buffer + BULKI *deserializedBulki = BULKI_deserialize(buffer); + + // printf("Deserialized data:\n"); + // BULKI_print(deserializedBulki); + + int equal = BULKI_equal(bulki, deserializedBulki); + printf("bulki == deserializedBulki: %d\n", equal); + + // Free the memory + BULKI_free(deserializedBulki, 1); + // printf("Freed deserializedBulki\n"); + BULKI_free(bulki, 1); + // printf("Freed bulki\n"); + free(buffer); + + return equal; +} + +int +test_bulki_in_entitiy() +{ + + // Initialize a serialized data structure + BULKI *bulki = BULKI_init(1); + // BULKI in BULKI_Entity + BULKI_Entity *nestEntity = BULKI_ENTITY(bulki, 1, PDC_BULKI, PDC_CLS_ITEM); + + void * buffer = BULKI_Entity_serialize(nestEntity); + BULKI_Entity *des_nestEntity = BULKI_Entity_deserialize(buffer); + + int equal = BULKI_Entity_equal(nestEntity, des_nestEntity); + + printf("EMPTY BULKI in BULKI Entity = %d \n", equal); + + BULKI_put(bulki, BULKI_ENTITY("key", 1, PDC_STRING, PDC_CLS_ITEM), + BULKI_ENTITY("value", 1, PDC_STRING, PDC_CLS_ITEM)); + + buffer = BULKI_Entity_serialize(nestEntity); + des_nestEntity = BULKI_Entity_deserialize(buffer); + + equal = BULKI_Entity_equal(nestEntity, des_nestEntity); + printf("non-empty base BULKI in BULKI Entity = %d \n", equal); + + BULKI_Entity *secondValue = empty_Bent_Array_Entity(); + BULKI_ENTITY_append_BULKI_Entity(secondValue, BULKI_ENTITY("secondValue1", 1, PDC_STRING, PDC_CLS_ITEM)); + BULKI_ENTITY_append_BULKI_Entity(secondValue, BULKI_ENTITY("secondValue2", 1, PDC_STRING, PDC_CLS_ITEM)); + + BULKI_put(bulki, BULKI_ENTITY("key2", 1, PDC_STRING, PDC_CLS_ITEM), secondValue); + + buffer = BULKI_Entity_serialize(nestEntity); + des_nestEntity = BULKI_Entity_deserialize(buffer); + + equal = BULKI_Entity_equal(nestEntity, des_nestEntity); + printf("non-empty compound BULKI in BULKI Entity with array = %d \n", equal); + + return equal; } int main(int argc, char *argv[]) { - return test_serde_framework(); + printf("test_base_type RST = %d\n", test_base_type()); + printf("test_put_replace RST = %d\n", test_put_replace()); + printf("test_base_array_entitiy RST = %d\n", test_base_array_entitiy()); + printf("test_embedded_entitiy RST = %d\n", test_embedded_entitiy()); + printf("test_nested_entitiy RST = %d\n", test_bulki_in_entitiy()); } \ No newline at end of file diff --git a/src/commons/serde/include/bulki.h b/src/commons/serde/include/bulki.h index 168af576e..1133e279c 100644 --- a/src/commons/serde/include/bulki.h +++ b/src/commons/serde/include/bulki.h @@ -8,94 +8,326 @@ #include #include "pdc_generic.h" -typedef struct { - pdc_c_var_type_t pdc_type; /**< Data type of the key */ - uint64_t size; /**< Size of the key */ - void * key; /**< Pointer to the key data */ -} BULKI_Key; - +/** + * @brief BULKI_Entity structure + * + * pdc_class can be either SCALAR or ARRAY, or STRUCT. + * For SCALAR, the data is a single value of the type specified by pdc_type. + * For ARRAY, the data is an array of values of the type specified by pdc_type. + * For STRUCT, the data is a pointer to a BULKI structure, which is a collection of key-value pairs. + */ typedef struct { pdc_c_var_class_t pdc_class; /**< Class of the value */ pdc_c_var_type_t pdc_type; /**< Data type of the value */ - uint64_t size; // size of the data. If a string, it is strlen(data) + 1; - // if an array, it is the number of elements; - // if a struct, it is the totalSize of the data chunk of the struct, etc. - void *data; /**< Pointer to the value data */ -} BULKI_Value; + uint64_t count; /**< Number of elements in the array */ + uint64_t size; // size in byte of the data. + void * data; /**< Pointer to the value data */ +} BULKI_Entity; typedef struct { - BULKI_Key *keys; /**< Array of keys */ - size_t totalSize; /**< Total size of the header */ + BULKI_Entity *keys; /**< Array of keys */ + uint64_t headerSize; /**< Total bytes of the header region */ } BULKI_Header; typedef struct { - BULKI_Value *values; /**< Array of values */ - uint64_t totalSize; /**< Total size of the data */ + BULKI_Entity *values; /**< Array of values */ + uint64_t dataSize; /**< Total bytes of the data region */ } BULKI_Data; typedef struct { BULKI_Header *header; /**< Pointer to the header */ BULKI_Data * data; /**< Pointer to the data */ uint64_t totalSize; /**< Total size of the serialized data */ - uint64_t numKeys; /**< Number of keys */ + uint64_t numKeys; /**< Actual Number of keys in the header*/ + uint64_t capacity; /**< The predefined number of keys in Bulki. If numKeys >= capacity, array expansion is + needed */ } BULKI; +typedef struct { + BULKI_Entity *entity_array; // Points to the array being iterated + uint64_t current_idx; // Current index in the array + uint64_t total_size; // Total number of elements in the array + void * bent_filter; // Optional filter to apply during iteration, it can be NULL/an insteance of + // BULKI/an instance of BULKI_Entity + pdc_c_var_type_t filter_type; // The type of the filter +} BULKI_Entity_Iterator; + +typedef struct { + BULKI_Entity key; + BULKI_Entity value; +} BULKI_KV_Pair; + +typedef struct { + BULKI * bulki; + uint64_t current_idx; + uint64_t total_size; +} BULKI_KV_Pair_Iterator; + +/** + * @brief Append a BULKI structure to the BULKI_Entity structure that are returned by + * `empty_BULKI_Array_Entity` call. You need to make sure the content the src structure is the final version + * before calling this function. Any change to the content of src after calling this function will not be + * reflected in the serialized data structure. If you need to change the content of src after calling this + * function, you need to iterate through all the BULKI structure in the dest structure and update them. We do + * not recommend this because we do not assume the BULKI elements in the internal array would be unique to + * each other. + * @param bulki_entity Pointer to the BULKI_Entity structure + * @param bulki Pointer to the BULKI structure + * @return Pointer to the BULKI_Entity structure + */ +BULKI_Entity *BULKI_ENTITY_append_BULKI(BULKI_Entity *dest, BULKI *src); + +/** + * @brief Get the BULKI structure from the BULKI_Entity array by the given index. + * @param bulki_entity Pointer to the BULKI_Entity structure + * @param idx Index of the BULKI structure to get + * + * @return Pointer to the BULKI structure + */ +BULKI *BULKI_ENTITY_get_BULKI(BULKI_Entity *bulki_entity, size_t idx); + +/** + * @brief Append a BULKI_Entity structure to the BULKI_Entity structure + * You need to make sure the content the src structure is the final version before calling this function. + * Any change to the content of src after calling this function will not be reflected in the serialized data + * structure. + * If you need to change the content of src after calling this + * function, you need to iterate through all the BULKI_Entity structure in the dest structure and update them. + * We do not recommend this because we do not assume the BULKI_Entity elements in the internal array would be + * unique to each other. + * @param bulki_entity Pointer to the BULKI_Entity structure + * @param ent Pointer to the BULKI_Entity structure + * @return Pointer to the BULKI_Entity structure + */ +BULKI_Entity *BULKI_ENTITY_append_BULKI_Entity(BULKI_Entity *dest, BULKI_Entity *src); + +/** + * @brief Get the BULKI_Entity structure from the BULKI_Entity array by the given index. + * @param bulk_entity Pointer to the BULKI_Entity structure + * @param idx Index of the BULKI_Entity structure to get + * + * @return Pointer to the BULKI_Entity structure + */ +BULKI_Entity *BULKI_ENTITY_get_BULKI_Entity(BULKI_Entity *bulk_entity, size_t idx); + +/** + * @brief create an empty BULKI_Entity structure, usually used as a wrapper for BULKI_Entity structure. Since + * this will be used as a wrapper, we consider this to be an array of BULKI_Entity. + * @return Pointer to the created BULKI_Entity structure + */ +BULKI_Entity *empty_Bent_Array_Entity(); + +/** + * @brief create an empty BULKI structure, usually used as a wrapper for either a BULKI + */ +BULKI_Entity *empty_BULKI_Array_Entity(); + +/** + * @brief Create a BULKI_Entity structure with data of base type. + * @param data Pointer to the Entity data + * @param pdc_type Data type of the data + * @return Pointer to the created BULKI_Entity structure + */ +BULKI_Entity *BULKI_singleton_ENTITY(void *data, pdc_c_var_type_t pdc_type); + +/** + * @brief Create a BULKI_Entity structure with data of base type, and the count of elements in the array + * @param data Pointer to the Entity data + * @param count Number of elements in the array + * @param pdc_type Data type of each element in the array + * @return Pointer to the created BULKI_Entity structure + */ +BULKI_Entity *BULKI_array_ENTITY(void *data, uint64_t count, pdc_c_var_type_t pdc_type); + +/** + * @brief Create a BULKI_Entity structure with data of base type. + * It is okay to change the content of `data` after this call if `data` is a BULKI or BULKI_Entity structure. + * But to make sure you won't change teh content of `data` or the content of the result BULKI_Entity structure + * after any `BULKI_put` call or `BULKI_ENTITY_append_*` call. + * @param data Pointer to the Entity data + * @param count Number of elements in the array + * @param pdc_type Data type of each element in the array + * @param pdc_class Class of the each element in the array + * + * @return Pointer to the created BULKI_Entity structure + */ +BULKI_Entity *BULKI_ENTITY(void *data, uint64_t count, pdc_c_var_type_t pdc_type, + pdc_c_var_class_t pdc_class); + +/** + * @brief A wrapper of BULKI_ENTITY((void *)single_data, 1, pdc_type, PDC_CLS_ITEM) + * @param data Pointer to the data + * @param pdc_type Data type of the single element + * @return Pointer to the created BULKI_Entity structure + */ +BULKI_Entity *BULKI_singleton_ENTITY(void *data, pdc_c_var_type_t pdc_type); + +/** + * @brief A wrapper of BULKI_ENTITY((void*)array_data, array_len, pdc_type, PDC_CLS_ARRAY) + * @param data Pointer to the data + * @param count Number of elements in the array + * @param pdc_type Data type of each element in the array + * @return Pointer to the created BULKI_Entity structure + */ +BULKI_Entity *BULKI_array_ENTITY(void *data, uint64_t count, pdc_c_var_type_t pdc_type); + /** - * @brief Initialize a serialized data structure + * @brief Initialize a Bulki data structure * * @param initial_field_count Number of initial fields to allocate space for * * @return Pointer to the initialized BULKI structure */ -BULKI *BULKI_serde_init(int initial_field_count); +BULKI *BULKI_init(int initial_field_count); /** - * @brief Append a key-value pair to the serialized data structure - * + * @brief Compare two BULKI_Entity structures for equality + * @param be1 Pointer to the first BULKI_Entity structure + * @param be2 Pointer to the second BULKI_Entity structure + * @return 1 if the two structures are equal, 0 otherwise + */ +int BULKI_Entity_equal(BULKI_Entity *be1, BULKI_Entity *be2); + +/** + * @brief Compare two BULKI structures for equality + * @param bulki1 Pointer to the first BULKI structure + * @param bulki2 Pointer to the second BULKI structure + * @return 1 if the two structures are equal, 0 otherwise + */ +int BULKI_equal(BULKI *bulki1, BULKI *bulki2); + +/** + * @brief Put a key-value pair to the serialized data structure. If the key already exists, update the value. + * You need to make sure the content in both key and value are the final version before calling this function. + * Any change to the key or value after calling this function will not be reflected in the serialized data + * structure. + * If you really have to change the content of key or value after calling this function, you need to call + * BULKI_put again to update the serialized data structure. * @param data Pointer to the BULKI structure - * @param key Pointer to the BULKI_Key structure representing the key - * @param value Pointer to the BULKI_Value structure representing the value + * @param key Pointer to the BULKI_Entity structure representing the key + * @param value Pointer to the BULKI_Entity structure representing the value */ -void BULKI_serde_append_key_value(BULKI *data, BULKI_Key *key, BULKI_Value *value); +void BULKI_put(BULKI *bulki, BULKI_Entity *key, BULKI_Entity *value); /** - * @brief Free the memory allocated for the serialized data structure + * @brief Delete a key-value pair from the serialized data structure * - * @param data Pointer to the BULKI structure to be freed + * @param data Pointer to the BULKI structure + * @param key Pointer to the BULKI_Entity structure representing the key + * @return the deleted BULKI_Entity value. If the key is not found, return NULL. */ -void BULKI_serde_free(BULKI *data); +BULKI_Entity *BULKI_delete(BULKI *bulki, BULKI_Entity *key); /** - * @brief Print the contents of the serialized data structure - * - * @param data Pointer to the BULKI structure to be printed + * @brief Initialize the iterator with an optional BULKI_Entity as a filter + * @param entity_array Pointer to the BULKI_Entity array + * @param filter This can be NULL/an instance of BULKI/an instance of BULKI_Entity + * @param filter_type The type of the filter, must be PDC_BULKI, PDC_BULKI_ENTITY + * @return Pointer to the Bent_Iterator structure + */ +BULKI_Entity_Iterator *Bent_iterator_init(BULKI_Entity *array, void *filter, pdc_c_var_type_t filter_type); + +/** + * @brief Test if the iterator has more BULKI structures to return + * @param iter Pointer to the Bent_Iterator structure + * @return 1 if there are more BULKI structures to return, 0 otherwise + */ +int Bent_iterator_has_next_BULKI(BULKI_Entity_Iterator *it); + +/** + * @brief Test if the iterator has more BULKI_Entity structures to return + * @param iter Pointer to the Bent_Iterator structure + * @return 1 if there are more BULKI_Entity structures to return, 0 otherwise + */ +int Bent_iterator_has_next_Bent(BULKI_Entity_Iterator *it); + +/** + * @brief Get the next BULKI from the iterator + * @param iter Pointer to the Bent_Iterator structure + * @return Pointer to the next BULKI structure + */ +BULKI *Bent_iterator_next_BULKI(BULKI_Entity_Iterator *it); + +/** + * @brief Get the next BULKI_Entity from the iterator + * @param iter Pointer to the Bent_Iterator structure + * @return Pointer to the next BULKI_Entity structure + */ +BULKI_Entity *Bent_iterator_next_Bent(BULKI_Entity_Iterator *it); + +/** + * @brief Initialize the iterator with a BULKI structure + * @param bulki Pointer to the BULKI structure + * @return Pointer to the BULKI_KV_Pair_Iterator structure + */ +BULKI_KV_Pair_Iterator *BULKI_KV_Pair_iterator_init(BULKI *bulki); + +/** + * @brief Test if the iterator has more BULKI_KV_Pair structures to return + * @param iter Pointer to the BULKI_KV_Pair_Iterator structure + * @return 1 if there are more BULKI_KV_Pair structures to return, 0 otherwise + */ +int BULKI_KV_Pair_iterator_has_next(BULKI_KV_Pair_Iterator *it); + +/** + * @brief Get the next BULKI_KV_Pair from the iterator + * @param iter Pointer to the BULKI_KV_Pair_Iterator structure + * @return Pointer to the next BULKI_KV_Pair structure + */ +BULKI_KV_Pair *BULKI_KV_Pair_iterator_next(BULKI_KV_Pair_Iterator *it); + +/** + * @brief Get the value of a key from the serialized data structure + * @param data Pointer to the BULKI structure + * @param key Pointer to the BULKI_Entity structure representing the key + * @return Pointer to the BULKI_Entity structure representing the value + */ +BULKI_Entity *BULKI_get(BULKI *bulki, BULKI_Entity *key); + +/** + * @brief Print the contents of BULKI or BULKI_Entity structure. + * @param data Pointer to the BULKI structure + */ +void BULKI_print(BULKI *bulki); + +/** + * @brief Print the contents of BULKI or BULKI_Entity structure. + * @param data Pointer to the BULKI structure */ -void BULKI_serde_print(BULKI *data); +void BULKI_Entity_print(BULKI_Entity *bulk_entity); /** - * @brief Create a BULKI_Key structure + * @brief get the total size of BULKI or BULKI_Entity structure. * - * @param key Pointer to the key data - * @param pdc_type Data type of the key. For BULKI_Key, we only support PDC_CLS_SCALAR class. - * @param size Size of the key data + * @param data Pointer to the BULKI structure + * @return size_t + */ +size_t get_BULKI_size(BULKI *bulki); + +/** + * @brief get the total size of BULKI_Entity structure. * - * @return Pointer to the created BULKI_Key structure + * @param data Pointer to the BULKI_Entity structure + * @return size_t */ -BULKI_Key *BULKI_KEY(void *key, pdc_c_var_type_t pdc_type, uint64_t size); +size_t get_BULKI_Entity_size(BULKI_Entity *bulk_entity); /** - * @brief Create a BULKI_Value structure + * @brief free the memory allocated for the BULKI_Entity structure * - * @param data Pointer to the value data - * @param pdc_type Data type of the value - * @param pdc_class Class of the value - * @param size Size of the value data. - * For scalar value, it is the result of sizeof(type) function; - * for array, it is the number of elements; - * for struct, it is the totalSize of the data chunk of the struct, etc. + * @param bulk_entity Pointer to the BULKI_Entity structure to be freed + * @param free_struct If 1, free the BULKI_Entity structure itself + * @return void + */ +void BULKI_Entity_free(BULKI_Entity *bulk_entity, int free_struct); + +/** + * @brief free the memory allocated for the BULKI structure * - * @return Pointer to the created BULKI_Value structure + * @param bulki Pointer to the BULKI structure to be freed + * @param free_struct If 1, free the BULKI structure itself + * @return void */ -BULKI_Value *BULKI_VALUE(void *data, pdc_c_var_type_t pdc_type, pdc_c_var_class_t pdc_class, uint64_t size); +void BULKI_free(BULKI *bulki, int free_struct); #endif /* BULKI_H */ \ No newline at end of file diff --git a/src/commons/serde/include/bulki_serde.h b/src/commons/serde/include/bulki_serde.h index bd139881a..9b8f5586e 100644 --- a/src/commons/serde/include/bulki_serde.h +++ b/src/commons/serde/include/bulki_serde.h @@ -13,30 +13,113 @@ #define MAX_BUFFER_SIZE 1000 /** - * @brief get the total size of BULKI structure instance + * @brief Serialize a BULKI_Entity structure to a buffer * - * @param data Pointer to the BULKI structure instance + * @param entity Pointer to the BULKI_Entity structure + * @param buffer Pointer to the buffer + * @param offset Pointer to the offset * - * @return total size of the BULKI structure instance + * @return Pointer to the buffer */ -uint64_t get_total_size_for_serialized_data(BULKI *data); +void *BULKI_Entity_serialize_to_buffer(BULKI_Entity *entity, void *buffer, size_t *offset); /** - * @brief Serialize the data in the serialized data structure and return the buffer + * @brief Serialize a BULKI structure to a buffer + * + * @param bulki Pointer to the BULKI structure + * @param buffer Pointer to the buffer + * @param offset Pointer to the offset + * + * @return Pointer to the buffer + */ +void *BULKI_serialize_to_buffer(BULKI *bulki, void *buffer, size_t *offset); + +/** + * @brief Serialize a BULKI_Entity structure to a buffer + * + * @param entity Pointer to the BULKI_Entity structure + * + * @return Pointer to the buffer + */ +void *BULKI_Entity_serialize(BULKI_Entity *entity); + +/** + * @brief Serialize a BULKI structure to a buffer * * @param data Pointer to the BULKI structure * - * @return Pointer to the buffer containing the serialized data + * @return Pointer to the buffer + */ +void *BULKI_serialize(BULKI *data); + +/** + * @brief Serialize a BULKI_Entity structure to a file and then close the file + * @param bulki_entity The BULKI_Entity structure + * @param fp The file pointer + * @return + */ +void BULKI_Entity_serialize_to_file(BULKI_Entity *bulki_entity, FILE *fp); + +/** + * @brief Serialize a BULKI structure to a file and then close the file + * @param bulki The BULKI structure + * @param fp The file pointer + * @return + */ +void BULKI_serialize_to_file(BULKI *bulki, FILE *fp); + +/********************** Deserialize ************************** */ + +/** + * @brief Deserialize a BULKI_Entity structure from a buffer + * + * @param buffer Pointer to the buffer + * @param offset Pointer to the offset + * + * @return Pointer to the BULKI_Entity structure + */ +BULKI *BULKI_deserialize_from_buffer(void *buffer, size_t *offset); + +/** + * @brief Deserialize a BULKI structure from a buffer + * + * @param buffer Pointer to the buffer + * + * @return Pointer to the BULKI structure */ -void *BULKI_serde_serialize(BULKI *data); +BULKI *BULKI_deserialize(void *buffer); /** - * @brief Deserialize the buffer and return the deserialized data structure + * @brief Deserialize a BULKI structure from a buffer * - * @param buffer Pointer to the buffer containing the serialized data + * @param buffer Pointer to the buffer + * @param offset Pointer to the offset * - * @return Pointer to the deserialized BULKI structure + * @return Pointer to the BULKI structure + */ +BULKI_Entity *BULKI_Entity_deserialize_from_buffer(void *buffer, size_t *offset); + +/** + * @brief Deserialize a BULKI structure from a buffer + * + * @param buffer Pointer to the buffer + * + * @return Pointer to the BULKI structure + */ +BULKI_Entity *BULKI_Entity_deserialize(void *buffer); + +/** + * @brief Deserialize a BULKI_Entity structure from a file + * @param fp The file pointer + * @return Pointer to the BULKI_Entity structure + */ +BULKI_Entity *BULKI_Entity_deserialize_from_file(FILE *fp); + +/** + * @brief Deserialize a BULKI structure from a file + * @param fp The file pointer + * @return Pointer to the BULKI structure */ -BULKI *BULKI_serde_deserialize(void *buffer); +BULKI *BULKI_deserialize_from_file(FILE *fp); #endif /* BULKI_SERDE_H */ \ No newline at end of file diff --git a/src/commons/utils/include/pdc_malloc.h b/src/commons/utils/include/pdc_malloc.h index e967c9032..14440040a 100644 --- a/src/commons/utils/include/pdc_malloc.h +++ b/src/commons/utils/include/pdc_malloc.h @@ -71,6 +71,12 @@ void *PDC_realloc_addsize(void *ptr, size_t size, size_t *mem_usage_ptr); */ void *PDC_free(void *mem); +/** + * free allocated memory and subtract size from specified memory size pointer + * \param mem [IN] Starting address of memory + */ +void PDC_free_void(void *mem); + /** * Get total memory usage from the global variable */ diff --git a/src/commons/utils/include/query_utils.h b/src/commons/utils/include/query_utils.h index 513fa0916..1d8906cf1 100644 --- a/src/commons/utils/include/query_utils.h +++ b/src/commons/utils/include/query_utils.h @@ -7,6 +7,7 @@ #define TAG_DELIMITER "," #include "string_utils.h" #include "pdc_public.h" +#include "comparators.h" typedef struct query_gen_input { pdc_kvtag_t *base_tag; @@ -24,6 +25,19 @@ typedef struct query_gen_output { size_t value_query_len; } query_gen_output_t; +typedef enum { NUM_EXACT, NUM_LT, NUM_GT, NUM_BETWEEN } NUM_QUERY_TYPE; + +typedef void (*num_query_action)(void *cond_exact, void *cond_lo, void *cond_hi, int lo_inclusive, + int hi_inclusive, pdc_c_var_type_t num_type, void *input, void **out, + uint64_t *out_len); + +typedef struct { + num_query_action exact_action; + num_query_action lt_action; + num_query_action gt_action; + num_query_action between_action; +} num_query_action_collection_t; + /** * Generate query strings for key and value according to the given input. * The query strings will be stored in the output. @@ -117,4 +131,37 @@ int is_value_match_p(const char *tagslist, const char *tagname, const char *patt */ int is_value_in_range(const char *tagslist, const char *tagname, int from, int to); +/** + * determine if the value part in the query condition is a string query + */ +int is_string_query(char *value_query); + +/** + * determine if the value part in the query condition is an affix-based query + */ +int is_affix_query(char *value_query); + +/** + * determine if the value part in the query condition is a range query + */ +int is_number_query(char *value_query); + +/** + * parse and run a string query for number value + * + * The following queries are what we need to support + * 1. exact query -> key=|value| (key == value) + * 5. range query -> key=value~ (key > value) + * 6. range query -> key=~value (key < value) + * 7. range query -> key=value|~ (key >= value) + * 8. range query -> key=~|value (key <= value) + * 9. range query -> key=value1|~value2 (value1 <= key < value2) + * 10. range query -> key=value1~|value2 (value1 < key <= value2) + * 11. range query -> key=value1~value2 (value1 < key < value2) + * 12. range query -> key=value1|~|value2 (value1 <= key <= value2) + */ +int parse_and_run_number_value_query(char *num_val_query, pdc_c_var_type_t num_type, + num_query_action_collection_t *action_collection, void *cb_input, + uint64_t *cb_out_len, void **cb_out); + #endif // PDC_QUERY_UTILS_H diff --git a/src/commons/utils/include/string_utils.h b/src/commons/utils/include/string_utils.h index 865a282de..ce264d449 100644 --- a/src/commons/utils/include/string_utils.h +++ b/src/commons/utils/include/string_utils.h @@ -188,4 +188,18 @@ int split_string(const char *str, const char *delim, char ***result, int *result */ char **gen_random_strings(int count, int minlen, int maxlen, int alphabet_size); +/** + * determine if the given token is a quoted string. + * @param token + * @return 1 if it is a quoted string, 0 if not. + */ +int is_quoted_string(char *token); + +/** + * remove the quotes from a string + * @param str + * @return + */ +char *stripQuotes(const char *str); + #endif // PDC_STRING_UTILS_H diff --git a/src/commons/utils/pdc_malloc.c b/src/commons/utils/pdc_malloc.c index c7ca6aa5f..8bcd2812b 100644 --- a/src/commons/utils/pdc_malloc.c +++ b/src/commons/utils/pdc_malloc.c @@ -181,6 +181,12 @@ PDC_free(void *mem) return PDC_free_knowing_old_size(mem, _old_size); } +void +PDC_free_void(void *mem) +{ + PDC_free(mem); +} + size_t PDC_get_global_mem_usage() { diff --git a/src/commons/utils/query_utils.c b/src/commons/utils/query_utils.c index c0ea64032..058639cf6 100644 --- a/src/commons/utils/query_utils.c +++ b/src/commons/utils/query_utils.c @@ -2,47 +2,56 @@ // Created by Wei Zhang on 7/10/17. // #include "query_utils.h" +#include +#include int _gen_affix_for_token(char *token_str, int affix_type, size_t affix_len, char **out_str) { size_t token_len = strlen(token_str); + + if (affix_type == 0) { + *out_str = strdup(token_str); + return token_len; + } + affix_len = affix_len < token_len ? affix_len : token_len; size_t copy_len = affix_type == 0 ? token_len : affix_len; char * source = affix_type <= 1 ? token_str : &(token_str[token_len - affix_len]); - *out_str = (char *)calloc(copy_len + 3, sizeof(char)); - strncpy(*out_str, source, copy_len + 1); + char * affix_str = (char *)calloc(copy_len + 3, sizeof(char)); - if (affix_type == 0) { // exact - // nothing to do here. - } - else if (affix_type == 1) { // prefix + strncpy(affix_str, source, copy_len + 1); + + if (affix_type == 1) { // prefix // "hello" -> "hell*" or "hell" -> "hell*" - (*out_str)[affix_len] = '*'; - (*out_str)[affix_len + 1] = '\0'; + affix_str[affix_len] = '*'; + affix_str[affix_len + 1] = '\0'; } else if (affix_type == 2) { // suffix // "hello" -> '*ello' or 'hell' -> '*hell' for (int k = affix_len; k > 0; k--) { - (*out_str)[k] = (*out_str)[k - 1]; + affix_str[k] = affix_str[k - 1]; } - (*out_str)[0] = '*'; - (*out_str)[affix_len + 1] = '\0'; + affix_str[0] = '*'; + affix_str[affix_len + 1] = '\0'; } else if (affix_type == 3) { // infix // "hello" -> '*ello*' or 'hell' -> '*hell*' for (int k = affix_len; k > 0; k--) { - (*out_str)[k] = (*out_str)[k - 1]; + affix_str[k] = affix_str[k - 1]; } - (*out_str)[0] = '*'; - (*out_str)[affix_len + 1] = '*'; - (*out_str)[affix_len + 2] = '\0'; + affix_str[0] = '*'; + affix_str[affix_len + 1] = '*'; + affix_str[affix_len + 2] = '\0'; } else { - printf("Invalid affix type!\n"); + printf("Invalid affix type %d!\n", affix_type); return 0; } + + *out_str = affix_str; + return strlen(*out_str); } @@ -77,36 +86,55 @@ gen_query_key_value(query_gen_input_t *input, query_gen_output_t *output) } // process value in base_tag - if (input->base_tag->type == PDC_STRING) { - value_ptr_len = _gen_affix_for_token((char *)input->base_tag->value, input->value_query_type, - affix_len, &value_ptr); + if (is_PDC_STRING(input->base_tag->type)) { + char *temp_value = NULL; + value_ptr_len = _gen_affix_for_token((char *)input->base_tag->value, input->value_query_type, + affix_len, &temp_value); + value_ptr = (char *)calloc(value_ptr_len + 3, sizeof(char)); + value_ptr[0] = '"'; + strcat(value_ptr, temp_value); + value_ptr[value_ptr_len + 1] = '"'; + value_ptr[value_ptr_len + 2] = '\0'; + if (value_ptr_len == 0) { printf("Failed to generate value query!\n"); return; } } - else if (input->base_tag->type == PDC_INT) { + else { + if (is_PDC_INT(input->base_tag->type)) { + input->base_tag->type = PDC_INT64; + } + else if (is_PDC_UINT(input->base_tag->type)) { + input->base_tag->type = PDC_UINT64; + } + else if (is_PDC_FLOAT(input->base_tag->type)) { + input->base_tag->type = PDC_DOUBLE; + } + else { + printf("Invalid tag type!\n"); + return; + } + char *format_str = get_format_by_dtype(input->base_tag->type); if (input->value_query_type == 4) { - value_ptr_len = snprintf(NULL, 0, "%d", ((int *)input->base_tag->value)[0]); + value_ptr_len = snprintf(NULL, 0, format_str, ((int64_t *)input->base_tag->value)[0]); value_ptr = (char *)calloc(value_ptr_len + 1, sizeof(char)); - snprintf(value_ptr, value_ptr_len + 1, "%d", ((int *)input->base_tag->value)[0]); + snprintf(value_ptr, value_ptr_len + 1, format_str, ((int64_t *)input->base_tag->value)[0]); } else if (input->value_query_type == 5) { - size_t lo_len = snprintf(NULL, 0, "%d", input->range_lo); - size_t hi_len = snprintf(NULL, 0, "%d", input->range_hi); + size_t lo_len = snprintf(NULL, 0, format_str, input->range_lo); + size_t hi_len = snprintf(NULL, 0, format_str, input->range_hi); value_ptr_len = lo_len + hi_len + 1; value_ptr = (char *)calloc(value_ptr_len + 1, sizeof(char)); - snprintf(value_ptr, value_ptr_len + 1, "%d~%d", input->range_lo, input->range_hi); + char fmt_str[20]; + snprintf(fmt_str, 20, "%s~%s", format_str, format_str); + snprintf(value_ptr, value_ptr_len + 1, fmt_str, input->range_lo, input->range_hi); } else { printf("Invalid value query type for integer!\n"); return; } } - else { - printf("Invalid tag type!\n"); - return; - } output->key_query = key_ptr; output->key_query_len = key_ptr_len; @@ -344,3 +372,110 @@ is_value_in_range(const char *tagslist, const char *tagname, int from, int to) int v = atoi(value); return (v >= from && v <= to); } + +int +is_string_query(char *value_query) +{ + return is_quoted_string(value_query); +} + +int +is_affix_query(char *value_query) +{ + if (is_string_query(value_query) && contains(value_query, "*")) { + return 1; + } + return 0; +} + +int +is_number_query(char *value_query) +{ + return !is_string_query(value_query); +} + +int +parse_and_run_number_value_query(char *num_val_query, pdc_c_var_type_t num_type, + num_query_action_collection_t *action_collection, void *cb_input, + uint64_t *cb_out_len, void **cb_out) +{ + // allocate memory according to the val_idx_dtype for value 1 and value 2. + void *val1; + void *val2; + if (startsWith(num_val_query, "|") && startsWith(num_val_query, "|")) { // EXACT + // exact number search + char * num_str = substring(num_val_query, 1, strlen(num_val_query) - 1); + size_t klen1 = get_number_from_string(num_str, num_type, &val1); + + action_collection->exact_action(val1, NULL, NULL, 1, 1, num_type, cb_input, cb_out, cb_out_len); + + // value_index_leaf_content_t *value_index_leaf = NULL; + // rbt_find(leafcnt->primary_rbt, val1, klen1, (void **)&value_index_leaf); + // if (value_index_leaf != NULL) { + // collect_obj_ids(value_index_leaf, idx_record); + // } + } + else if (startsWith(num_val_query, "~")) { // LESS THAN + int endInclusive = num_val_query[1] == '|'; + // find all numbers that are smaller than the given number + int beginPos = endInclusive ? 2 : 1; + char * numstr = substring(num_val_query, beginPos, strlen(num_val_query)); + size_t klen1 = get_number_from_string(numstr, num_type, &val1); + action_collection->lt_action(NULL, NULL, val1, 0, endInclusive, num_type, cb_input, cb_out, + cb_out_len); + + // rbt_range_lt(leafcnt->primary_rbt, val1, klen1, value_rbt_callback, idx_record, endInclusive); + } + else if (endsWith(num_val_query, "~")) { // GEATER THAN + int beginInclusive = num_val_query[strlen(num_val_query) - 2] == '|'; + int endPos = beginInclusive ? strlen(num_val_query) - 2 : strlen(num_val_query) - 1; + // find all numbers that are greater than the given number + char * numstr = substring(num_val_query, 0, endPos); + size_t klen1 = get_number_from_string(numstr, num_type, &val1); + + action_collection->gt_action(NULL, val1, NULL, beginInclusive, 0, num_type, cb_input, cb_out, + cb_out_len); + // rbt_range_gt(leafcnt->primary_rbt, val1, klen1, value_rbt_callback, idx_record, beginInclusive); + } + else if (contains(num_val_query, "~")) { // BETWEEN + int num_tokens = 0; + char **tokens = NULL; + // the string is not ended or started with '~', and if it contains '~', it is a in-between query. + split_string(num_val_query, "~", &tokens, &num_tokens); + if (num_tokens != 2) { + printf("ERROR: invalid range query: %s\n", num_val_query); + return -1; + } + char *lo_tok = tokens[0]; + char *hi_tok = tokens[1]; + // lo_tok might be ended with '|', and hi_tok might be started with '|', to indicate inclusivity. + int beginInclusive = endsWith(lo_tok, "|"); + int endInclusive = startsWith(hi_tok, "|"); + char * lo_num_str = beginInclusive ? substring(lo_tok, 0, strlen(lo_tok) - 1) : lo_tok; + char * hi_num_str = endInclusive ? substring(hi_tok, 1, strlen(hi_tok)) : hi_tok; + size_t klen1 = get_number_from_string(lo_num_str, num_type, &val1); + size_t klen2 = get_number_from_string(hi_num_str, num_type, &val2); + + action_collection->between_action(NULL, val1, val2, beginInclusive, endInclusive, num_type, cb_input, + cb_out, cb_out_len); + // int num_visited_node = rbt_range_walk(leafcnt->primary_rbt, val1, klen1, val2, klen2, + // value_rbt_callback, idx_record, beginInclusive, + // endInclusive); + // println("[value_number_query] num_visited_node: %d\n", num_visited_node); + } + else { + // exact query by default + // exact number search + char * num_str = strdup(num_val_query); + size_t klen1 = get_number_from_string(num_str, num_type, &val1); + + action_collection->exact_action(val1, NULL, NULL, 1, 1, num_type, cb_input, cb_out, cb_out_len); + // value_index_leaf_content_t *value_index_leaf = NULL; + // rbt_find(leafcnt->primary_rbt, val1, klen1, (void **)&value_index_leaf); + // if (value_index_leaf != NULL) { + // collect_obj_ids(value_index_leaf, idx_record); + // } + // free(num_str); + } + return 0; +} \ No newline at end of file diff --git a/src/commons/utils/string_utils.c b/src/commons/utils/string_utils.c index 546d5679d..d12ea6daa 100644 --- a/src/commons/utils/string_utils.c +++ b/src/commons/utils/string_utils.c @@ -327,4 +327,35 @@ gen_random_strings(int count, int minlen, int maxlen, int alphabet_size) result[c] = str; } return result; +} + +int +is_quoted_string(char *token) +{ + if (startsWith(token, "\"") || endsWith(token, "\"")) { + return 1; + } + return 0; +} + +/** + * remove the quotes from a string + */ +char * +stripQuotes(const char *str) +{ + if (str == NULL) { + return NULL; + } + + int len = strlen(str); + if (len >= 2 && str[0] == '"' && str[len - 1] == '"') { + // Call substring to remove the first and last character + char *stripped = substring(str, 1, len - 1); + return stripped; + } + else { + // No quotes to strip, return a copy of the original string + return strdup(str); // strdup allocates memory for the copy + } } \ No newline at end of file diff --git a/src/server/CMakeLists.txt b/src/server/CMakeLists.txt index 5653602d9..0d7bae543 100644 --- a/src/server/CMakeLists.txt +++ b/src/server/CMakeLists.txt @@ -56,7 +56,7 @@ add_library(pdc_server_lib pdc_client_server_common.c dablooms/pdc_dablooms.c dablooms/pdc_murmur.c - pdc_hash-table.c + # pdc_hash-table.c ${PDC_SOURCE_DIR}/src/server/pdc_server_analysis/pdc_server_analysis.c ${PDC_SOURCE_DIR}/src/server/pdc_server_region/pdc_server_data.c ${PDC_SOURCE_DIR}/src/server/pdc_server_region/pdc_server_region_cache.c diff --git a/src/server/include/pdc_client_server_common.h b/src/server/include/pdc_client_server_common.h index 98edcacdc..7f1f7f3ff 100644 --- a/src/server/include/pdc_client_server_common.h +++ b/src/server/include/pdc_client_server_common.h @@ -64,7 +64,6 @@ hg_thread_mutex_t meta_obj_map_mutex_g; #define PDC_SEQ_ID_INIT_VALUE 1000 #define PDC_UPDATE_CACHE 111 #define PDC_UPDATE_STORAGE 101 -#define DART_ALPHABET_SIZE 27 #ifndef HOST_NAME_MAX #if defined(__APPLE__) @@ -1153,15 +1152,20 @@ typedef struct { /* Define dart_perform_one_server_in_t */ typedef struct { - int8_t op_type; - int8_t hash_algo; - hg_const_string_t attr_key; - hg_const_string_t attr_val; - int8_t obj_ref_type; - uint64_t obj_primary_ref; - uint64_t obj_secondary_ref; - uint64_t obj_server_ref; - int64_t timestamp; + int8_t op_type; + int8_t hash_algo; + char * attr_key; + uint32_t attr_vsize; + uint8_t attr_vtype; + void * attr_val; + uint64_t vnode_id; + int8_t obj_ref_type; + uint64_t obj_primary_ref; + uint64_t obj_secondary_ref; + uint64_t obj_server_ref; + int8_t inserting_suffix; + int64_t timestamp; + uint32_t src_client_id; } dart_perform_one_server_in_t; /* Define dart_perform_one_server_out_t */ @@ -1206,7 +1210,6 @@ hg_proc_pdc_kvtag_t(hg_proc_t proc, void *data) if (struct_data->size) { switch (hg_proc_get_op(proc)) { case HG_DECODE: - struct_data->value = malloc(struct_data->size); /* HG_FALLTHROUGH(); */ /* FALLTHRU */ @@ -3851,12 +3854,22 @@ hg_proc_dart_perform_one_server_in_t(hg_proc_t proc, void *data) // HG_LOG_ERROR("Proc error"); return ret; } - ret = hg_proc_hg_const_string_t(proc, &struct_data->attr_key); + ret = hg_proc_hg_string_t(proc, &struct_data->attr_key); + if (ret != HG_SUCCESS) { + // HG_LOG_ERROR("Proc error"); + return ret; + } + ret = hg_proc_uint32_t(proc, &struct_data->attr_vsize); if (ret != HG_SUCCESS) { // HG_LOG_ERROR("Proc error"); return ret; } - ret = hg_proc_hg_const_string_t(proc, &struct_data->attr_val); + ret = hg_proc_uint8_t(proc, &struct_data->attr_vtype); + if (ret != HG_SUCCESS) { + // HG_LOG_ERROR("Proc error"); + return ret; + } + ret = hg_proc_uint64_t(proc, &struct_data->vnode_id); if (ret != HG_SUCCESS) { // HG_LOG_ERROR("Proc error"); return ret; @@ -3881,12 +3894,36 @@ hg_proc_dart_perform_one_server_in_t(hg_proc_t proc, void *data) // HG_LOG_ERROR("Proc error"); return ret; } - + ret = hg_proc_int8_t(proc, &struct_data->inserting_suffix); + if (ret != HG_SUCCESS) { + // HG_LOG_ERROR("Proc error"); + return ret; + } ret = hg_proc_int64_t(proc, &struct_data->timestamp); if (ret != HG_SUCCESS) { // HG_LOG_ERROR("Proc error"); return ret; } + ret = hg_proc_uint32_t(proc, &struct_data->src_client_id); + if (ret != HG_SUCCESS) { + // HG_LOG_ERROR("Proc error"); + return ret; + } + if (struct_data->attr_vsize) { + switch (hg_proc_get_op(proc)) { + case HG_DECODE: + struct_data->attr_val = malloc(struct_data->attr_vsize); + /* HG_FALLTHROUGH(); */ + /* FALLTHRU */ + case HG_ENCODE: + ret = hg_proc_raw(proc, struct_data->attr_val, struct_data->attr_vsize); + break; + case HG_FREE: + free(struct_data->attr_val); + default: + break; + } + } return ret; } diff --git a/src/server/include/pdc_hash-table.h b/src/server/include/pdc_hash-table.h deleted file mode 100644 index 76474d33d..000000000 --- a/src/server/include/pdc_hash-table.h +++ /dev/null @@ -1,263 +0,0 @@ -/* - -Copyright (c) 2005-2008, Simon Howard - -Permission to use, copy, modify, and/or distribute this software -for any purpose with or without fee is hereby granted, provided -that the above copyright notice and this permission notice appear -in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL -WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE -AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR -CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM -LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, -NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN -CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - */ - -/** - * @file hash-table.h - * - * @brief Hash table. - * - * A hash table stores a set of values which can be addressed by a - * key. Given the key, the corresponding value can be looked up - * quickly. - * - * To create a hash table, use @ref hash_table_new. To destroy a - * hash table, use @ref hash_table_free. - * - * To insert a value into a hash table, use @ref hash_table_insert. - * - * To remove a value from a hash table, use @ref hash_table_remove. - * - * To look up a value by its key, use @ref hash_table_lookup. - * - * To iterate over all values in a hash table, use - * @ref hash_table_iterate to initialise a @ref HashTableIterator - * structure. Each value can then be read in turn using - * @ref hash_table_iter_next and @ref hash_table_iter_has_more. - */ - -#ifndef ALGORITHM_HASH_TABLE_H -#define ALGORITHM_HASH_TABLE_H - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * A hash table structure. - */ - -typedef struct _HashTable HashTable; - -/** - * Structure used to iterate over a hash table. - */ - -typedef struct _HashTableIterator HashTableIterator; - -/** - * Internal structure representing an entry in a hash table. - */ - -typedef struct _HashTableEntry HashTableEntry; - -/** - * A key to look up a value in a @ref HashTable. - */ - -typedef void *HashTableKey; - -/** - * A value stored in a @ref HashTable. - */ - -typedef void *HashTableValue; - -/** - * Internal structure representing an entry in hash table - * used as @ref HashTableIterator next result. - */ - -typedef struct _HashTablePair { - HashTableKey key; - HashTableValue value; -} HashTablePair; - -/** - * Definition of a @ref HashTableIterator. - */ - -struct _HashTableIterator { - HashTable * hash_table; - HashTableEntry *next_entry; - unsigned int next_chain; -}; - -/** - * A null @ref HashTableValue. - */ - -#define HASH_TABLE_NULL ((void *)0) - -/** - * Hash function used to generate hash values for keys used in a hash - * table. - * - * @param value The value to generate a hash value for. - * @return The hash value. - */ - -typedef unsigned int (*HashTableHashFunc)(HashTableKey value); - -/** - * Function used to compare two keys for equality. - * - * @return Non-zero if the two keys are equal, zero if the keys are - * not equal. - */ - -typedef int (*HashTableEqualFunc)(HashTableKey value1, HashTableKey value2); - -/** - * Type of function used to free keys when entries are removed from a - * hash table. - */ - -typedef void (*HashTableKeyFreeFunc)(HashTableKey value); - -/** - * Type of function used to free values when entries are removed from a - * hash table. - */ - -typedef void (*HashTableValueFreeFunc)(HashTableValue value); - -/** - * Create a new hash table. - * - * @param hash_func Function used to generate hash keys for the - * keys used in the table. - * @param equal_func Function used to test keys used in the table - * for equality. - * @return A new hash table structure, or NULL if it - * was not possible to allocate the new hash - * table. - */ - -HashTable *hash_table_new(HashTableHashFunc hash_func, HashTableEqualFunc equal_func); - -/** - * Destroy a hash table. - * - * @param hash_table The hash table to destroy. - */ - -void hash_table_free(HashTable *hash_table); - -/** - * Register functions used to free the key and value when an entry is - * removed from a hash table. - * - * @param hash_table The hash table. - * @param key_free_func Function used to free keys. - * @param value_free_func Function used to free values. - */ - -void hash_table_register_free_functions(HashTable *hash_table, HashTableKeyFreeFunc key_free_func, - HashTableValueFreeFunc value_free_func); - -/** - * Insert a value into a hash table, overwriting any existing entry - * using the same key. - * - * @param hash_table The hash table. - * @param key The key for the new value. - * @param value The value to insert. - * @return Non-zero if the value was added successfully, - * or zero if it was not possible to allocate - * memory for the new entry. - */ - -int hash_table_insert(HashTable *hash_table, HashTableKey key, HashTableValue value); - -/** - * Look up a value in a hash table by key. - * - * @param hash_table The hash table. - * @param key The key of the value to look up. - * @return The value, or @ref HASH_TABLE_NULL if there - * is no value with that key in the hash table. - */ - -HashTableValue hash_table_lookup(HashTable *hash_table, HashTableKey key); - -/** - * Remove a value from a hash table. - * - * @param hash_table The hash table. - * @param key The key of the value to remove. - * @return Non-zero if a key was removed, or zero if the - * specified key was not found in the hash table. - */ - -int hash_table_remove(HashTable *hash_table, HashTableKey key); - -/** - * Retrieve the number of entries in a hash table. - * - * @param hash_table The hash table. - * @return The number of entries in the hash table. - */ - -unsigned int hash_table_num_entries(HashTable *hash_table); - -/** - * Initialise a @ref HashTableIterator to iterate over a hash table. - * - * @param hash_table The hash table. - * @param iter Pointer to an iterator structure to - * initialise. - */ - -void hash_table_iterate(HashTable *hash_table, HashTableIterator *iter); - -/** - * Determine if there are more keys in the hash table to iterate - * over. - * - * @param iterator The hash table iterator. - * @return Zero if there are no more values to iterate - * over, non-zero if there are more values to - * iterate over. - */ - -int hash_table_iter_has_more(HashTableIterator *iterator); - -/** - * Using a hash table iterator, retrieve the next @ref HashTablePair. - * - * Note: To avoid @ref HashTableEntry internal @ref HashTablePair - * from being tampered with, and potentially messing with - * internal table structure, the function returns a copy - * of @ref HashTablePair stored internally. - * - * @param iterator The hash table iterator. - * @return The next @ref HashTablePair from the hash - * table, or @ref HASH_TABLE_NULL of Key and - * Value if there are no more keys to iterate - * over. - */ - -HashTablePair hash_table_iter_next(HashTableIterator *iterator); - -#ifdef __cplusplus -} -#endif - -#endif /* #ifndef ALGORITHM_HASH_TABLE_H */ diff --git a/src/server/include/pdc_server.h b/src/server/include/pdc_server.h index f2fca93d2..a1af90004 100644 --- a/src/server/include/pdc_server.h +++ b/src/server/include/pdc_server.h @@ -34,7 +34,7 @@ #include "mercury_atomic.h" #include "mercury_list.h" -#include "pdc_hash-table.h" +#include "pdc_hash_table.h" #include "pdc_client_server_common.h" #include "pdc_server_common.h" diff --git a/src/server/include/pdc_server_metadata.h b/src/server/include/pdc_server_metadata.h index de3c15c80..c41e27b12 100644 --- a/src/server/include/pdc_server_metadata.h +++ b/src/server/include/pdc_server_metadata.h @@ -32,7 +32,7 @@ #include "mercury_proc_string.h" #include "mercury_atomic.h" -#include "pdc_hash-table.h" +#include "pdc_hash_table.h" #include "pdc_server_common.h" #include "pdc_client_server_common.h" diff --git a/src/server/include/pdc_server_metadata_index.h b/src/server/include/pdc_server_metadata_index.h index d5360539c..5659dd14f 100644 --- a/src/server/include/pdc_server_metadata_index.h +++ b/src/server/include/pdc_server_metadata_index.h @@ -11,57 +11,15 @@ #include "pdc_hash.h" #include "pdc_compare.h" #include "dart_core.h" -#include "pdc_hash-table.h" - -typedef struct { - // On the leaf of ART, we maintain a hash table of IDs of all objects containing that key. - HashTable *server_id_obj_id_table; - - dart_indexed_value_type_t data_type; - // Also, for key lookup ART, we also maintain the pointer to the value tree - void *extra_prefix_index; - void *extra_suffix_index; - void *extra_range_index; - void *extra_infix_index; -} key_index_leaf_content; - -typedef struct pdc_art_iterator_param { - char * query_str; - char * level_one_infix; - char * level_two_infix; - uint32_t total_count; - Set * out; -} pdc_art_iterator_param_t; +#include "pdc_hash_table.h" +#include "bin_file_ops.h" /** - * @brief Initialize the ART index + * @brief Initialize local index + * @param num_server The number of servers + * @param server_id The server ID */ -void PDC_Server_dart_init(); - -// /** -// * @brief Create the metadata index -// * @param in [IN] Input parameters for the create operation -// * @param out [OUT] Output parameters for the create operation -// * @return perr_t SUCCESS on success, FAIL on failure -// */ -// perr_t PDC_Server_metadata_index_create(metadata_index_create_in_t *in, metadata_index_create_out_t *out); - -// /** -// * @brief Delete the metadata index -// * @param in [IN] Input parameters for the delete operation -// * @param out [OUT] Output parameters for the delete operation -// * @return perr_t SUCCESS on success, FAIL on failure -// */ -// perr_t PDC_Server_metadata_index_delete(metadata_index_delete_in_t *in, metadata_index_delete_out_t *out); - -// /** -// * @brief Search the metadata index -// * @param in [IN] Input parameters for the search operation -// * @param out [OUT] Output parameters for the search operation -// * @return perr_t SUCCESS on success, FAIL on failure -// */ -// perr_t PDC_Server_metadata_index_search(metadata_index_search_in_t *in, metadata_index_search_out_t *out, -// uint64_t *n_obj_ids_ptr, uint64_t **buf_ptrs); +void PDC_Server_metadata_index_init(uint32_t num_server, uint32_t server_id); /** * @brief Get the server information for the metadata index @@ -80,5 +38,20 @@ perr_t PDC_Server_dart_get_server_info(dart_get_server_info_in_t *in, dart_get_s perr_t PDC_Server_dart_perform_one_server(dart_perform_one_server_in_t * in, dart_perform_one_server_out_t *out, uint64_t *n_obj_ids_ptr, uint64_t **buf_ptrs); +/** + * @brief Dumping the index to a file. + * @param checkpoint_dir The directory path to store the index file. + * @param serverID The server ID. + * @return perr_t SUCCESS on success, FAIL on failure + */ +perr_t metadata_index_dump(char *checkpoint_dir, uint32_t serverID); + +/** + * @brief Recovering the index from a file. Please initialize idioms before calling this function. + * @param checkpiont_dir The directory path to store the index file. + * @param num_server The number of servers. + * @param serverID The server ID. + */ +perr_t metadata_index_recover(char *checkpiont_dir, int num_server, uint32_t serverID); #endif /* PDC_SERVER_METADATA_INDEX_H */ \ No newline at end of file diff --git a/src/server/pdc_client_server_common.c b/src/server/pdc_client_server_common.c index d90f83bff..89a9f8cfe 100644 --- a/src/server/pdc_client_server_common.c +++ b/src/server/pdc_client_server_common.c @@ -6450,6 +6450,8 @@ HG_TEST_RPC_CB(dart_perform_one_server, handle) stopwatch_t server_timer; timer_start(&server_timer); + // printf("==PDC_SERVER: dart_perform_one_server_cb(): key = %s\n", in.attr_key); + PDC_Server_dart_perform_one_server(&in, &out, n_obj_ids_ptr, buf_ptrs); timer_pause(&server_timer); diff --git a/src/server/pdc_hash-table.c b/src/server/pdc_hash-table.c deleted file mode 100644 index f4804a24a..000000000 --- a/src/server/pdc_hash-table.c +++ /dev/null @@ -1,476 +0,0 @@ -/* - -Copyright (c) 2005-2008, Simon Howard - -Permission to use, copy, modify, and/or distribute this software -for any purpose with or without fee is hereby granted, provided -that the above copyright notice and this permission notice appear -in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL -WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE -AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR -CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM -LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, -NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN -CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - */ - -/* Hash table implementation */ - -#include -#include -#include "pdc_hash-table.h" - -#ifdef ALLOC_TESTING -#include "alloc-testing.h" -#endif - -struct _HashTableEntry { - HashTablePair pair; - HashTableEntry *next; -}; - -struct _HashTable { - HashTableEntry ** table; - unsigned int table_size; - HashTableHashFunc hash_func; - HashTableEqualFunc equal_func; - HashTableKeyFreeFunc key_free_func; - HashTableValueFreeFunc value_free_func; - unsigned int entries; - unsigned int prime_index; -}; - -/* This is a set of good hash table prime numbers, from: - * http://planetmath.org/encyclopedia/GoodHashTablePrimes.html - * Each prime is roughly double the previous value, and as far as - * possible from the nearest powers of two. */ - -static const unsigned int hash_table_primes[] = { - 193, 389, 769, 1543, 3079, 6151, 12289, 24593, - 49157, 98317, 196613, 393241, 786433, 1572869, 3145739, 6291469, - 12582917, 25165843, 50331653, 100663319, 201326611, 402653189, 805306457, 1610612741, -}; - -static const unsigned int hash_table_num_primes = sizeof(hash_table_primes) / sizeof(int); - -/* Internal function used to allocate the table on hash table creation - * and when enlarging the table */ - -static int -hash_table_allocate_table(HashTable *hash_table) -{ - unsigned int new_table_size; - - /* Determine the table size based on the current prime index. - * An attempt is made here to ensure sensible behavior if the - * maximum prime is exceeded, but in practice other things are - * likely to break long before that happens. */ - - if (hash_table->prime_index < hash_table_num_primes) { - new_table_size = hash_table_primes[hash_table->prime_index]; - } - else { - new_table_size = hash_table->entries * 10; - } - - hash_table->table_size = new_table_size; - - /* Allocate the table and initialise to NULL for all entries */ - hash_table->table = calloc(hash_table->table_size, sizeof(HashTableEntry *)); - - return hash_table->table != NULL; -} - -/* Free an entry, calling the free functions if there are any registered */ -static void -hash_table_free_entry(HashTable *hash_table, HashTableEntry *entry) -{ - HashTablePair *pair; - - pair = &(entry->pair); - - /* If there is a function registered for freeing keys, use it to free - * the key */ - if (hash_table->key_free_func != NULL) { - hash_table->key_free_func(pair->key); - } - - /* Likewise with the value */ - if (hash_table->value_free_func != NULL) { - hash_table->value_free_func(pair->value); - } - - /* Free the data structure */ - free(entry); -} - -HashTable * -hash_table_new(HashTableHashFunc hash_func, HashTableEqualFunc equal_func) -{ - HashTable *hash_table; - - /* Allocate a new hash table structure */ - hash_table = (HashTable *)malloc(sizeof(HashTable)); - - if (hash_table == NULL) { - return NULL; - } - - hash_table->hash_func = hash_func; - hash_table->equal_func = equal_func; - hash_table->key_free_func = NULL; - hash_table->value_free_func = NULL; - hash_table->entries = 0; - hash_table->prime_index = 0; - - /* Allocate the table */ - if (!hash_table_allocate_table(hash_table)) { - free(hash_table); - - return NULL; - } - - return hash_table; -} - -void -hash_table_free(HashTable *hash_table) -{ - HashTableEntry *rover; - HashTableEntry *next; - unsigned int i; - - /* Free all entries in all chains */ - for (i = 0; i < hash_table->table_size; ++i) { - rover = hash_table->table[i]; - while (rover != NULL) { - next = rover->next; - hash_table_free_entry(hash_table, rover); - rover = next; - } - } - - /* Free the table */ - free(hash_table->table); - - /* Free the hash table structure */ - free(hash_table); -} - -void -hash_table_register_free_functions(HashTable *hash_table, HashTableKeyFreeFunc key_free_func, - HashTableValueFreeFunc value_free_func) -{ - hash_table->key_free_func = key_free_func; - hash_table->value_free_func = value_free_func; -} - -static int -hash_table_enlarge(HashTable *hash_table) -{ - HashTableEntry **old_table; - unsigned int old_table_size; - unsigned int old_prime_index; - HashTableEntry * rover; - HashTablePair * pair; - HashTableEntry * next; - unsigned int index; - unsigned int i; - - /* Store a copy of the old table */ - old_table = hash_table->table; - old_table_size = hash_table->table_size; - old_prime_index = hash_table->prime_index; - - /* Allocate a new, larger table */ - - ++hash_table->prime_index; - - if (!hash_table_allocate_table(hash_table)) { - - /* Failed to allocate the new table */ - hash_table->table = old_table; - hash_table->table_size = old_table_size; - hash_table->prime_index = old_prime_index; - - return 0; - } - - /* Link all entries from all chains into the new table */ - for (i = 0; i < old_table_size; ++i) { - rover = old_table[i]; - - while (rover != NULL) { - next = rover->next; - - /* Fetch rover HashTablePair */ - pair = &(rover->pair); - - /* Find the index into the new table */ - index = hash_table->hash_func(pair->key) % hash_table->table_size; - - /* Link this entry into the chain */ - rover->next = hash_table->table[index]; - hash_table->table[index] = rover; - - /* Advance to next in the chain */ - rover = next; - } - } - - /* Free the old table */ - free(old_table); - - return 1; -} - -int -hash_table_insert(HashTable *hash_table, HashTableKey key, HashTableValue value) -{ - HashTableEntry *rover; - HashTablePair * pair; - HashTableEntry *newentry; - unsigned int index; - - /* If there are too many items in the table with respect to the table - * size, the number of hash collisions increases and performance - * decreases. Enlarge the table size to prevent this happening */ - if ((hash_table->entries * 3) / hash_table->table_size > 0) { - - /* Table is more than 1/3 full */ - if (!hash_table_enlarge(hash_table)) { - /* Failed to enlarge the table */ - return 0; - } - } - - /* Generate the hash of the key and hence the index into the table */ - index = hash_table->hash_func(key) % hash_table->table_size; - - /* Traverse the chain at this location and look for an existing - * entry with the same key */ - rover = hash_table->table[index]; - - while (rover != NULL) { - /* Fetch rover's HashTablePair entry */ - pair = &(rover->pair); - - if (hash_table->equal_func(pair->key, key) != 0) { - - /* Same key: overwrite this entry with new data */ - - /* If there is a value free function, free the old data - * before adding in the new data */ - if (hash_table->value_free_func != NULL) { - hash_table->value_free_func(pair->value); - } - - /* Same with the key: use the new key value and free - * the old one */ - if (hash_table->key_free_func != NULL) { - hash_table->key_free_func(pair->key); - } - - pair->key = key; - pair->value = value; - - /* Finished */ - return 1; - } - - rover = rover->next; - } - - /* Not in the hash table yet. Create a new entry */ - newentry = (HashTableEntry *)malloc(sizeof(HashTableEntry)); - - if (newentry == NULL) { - return 0; - } - newentry->pair.key = key; - newentry->pair.value = value; - - /* Link into the list */ - newentry->next = hash_table->table[index]; -#ifdef ENABLE_MULTITHREAD - hg_thread_mutex_lock(&hash_table_new_mutex_g); -#endif - hash_table->table[index] = newentry; - - /* Maintain the count of the number of entries */ - ++hash_table->entries; - - /* Added successfully */ -#ifdef ENABLE_MULTITHREAD - hg_thread_mutex_unlock(&hash_table_new_mutex_g); -#endif - - return 1; -} - -HashTableValue -hash_table_lookup(HashTable *hash_table, HashTableKey key) -{ - HashTableEntry *rover; - HashTablePair * pair; - unsigned int index; - - /* Generate the hash of the key and hence the index into the table */ - index = hash_table->hash_func(key) % hash_table->table_size; - - /* Walk the chain at this index until the corresponding entry is - * found */ - - rover = hash_table->table[index]; - - while (rover != NULL) { - pair = &(rover->pair); - if (hash_table->equal_func(key, pair->key) != 0) { - - /* Found the entry. Return the data. */ - return pair->value; - } - - rover = rover->next; - } - - /* Not found */ - return HASH_TABLE_NULL; -} - -int -hash_table_remove(HashTable *hash_table, HashTableKey key) -{ - HashTableEntry **rover; - HashTableEntry * entry; - HashTablePair * pair; - unsigned int index; - int result; - - /* Generate the hash of the key and hence the index into the table */ - - index = hash_table->hash_func(key) % hash_table->table_size; - - /* Rover points at the pointer which points at the current entry - * in the chain being inspected. ie. the entry in the table, or - * the "next" pointer of the previous entry in the chain. This - * allows us to unlink the entry when we find it. */ - - result = 0; - rover = &hash_table->table[index]; - - while (*rover != NULL) { - - pair = &((*rover)->pair); - - if (hash_table->equal_func(key, pair->key) != 0) { - - /* This is the entry to remove */ - entry = *rover; - - /* Unlink from the list */ - *rover = entry->next; - - /* Destroy the entry structure */ - hash_table_free_entry(hash_table, entry); - - /* Track count of entries */ - --hash_table->entries; - - result = 1; - - break; - } - - /* Advance to the next entry */ - rover = &((*rover)->next); - } - - return result; -} - -unsigned int -hash_table_num_entries(HashTable *hash_table) -{ - return hash_table->entries; -} - -void -hash_table_iterate(HashTable *hash_table, HashTableIterator *iterator) -{ - unsigned int chain; - - iterator->hash_table = hash_table; - - /* Default value of next if no entries are found. */ - iterator->next_entry = NULL; - - /* Find the first entry */ - for (chain = 0; chain < hash_table->table_size; ++chain) { - - if (hash_table->table[chain] != NULL) { - iterator->next_entry = hash_table->table[chain]; - iterator->next_chain = chain; - break; - } - } -} - -int -hash_table_iter_has_more(HashTableIterator *iterator) -{ - return iterator->next_entry != NULL; -} - -HashTablePair -hash_table_iter_next(HashTableIterator *iterator) -{ - HashTableEntry *current_entry; - HashTable * hash_table; - HashTablePair pair = {NULL, NULL}; - unsigned int chain; - - hash_table = iterator->hash_table; - - if (iterator->next_entry == NULL) { - return pair; - } - - /* Result is immediately available */ - current_entry = iterator->next_entry; - pair = current_entry->pair; - - /* Find the next entry */ - if (current_entry->next != NULL) { - - /* Next entry in current chain */ - iterator->next_entry = current_entry->next; - } - else { - /* None left in this chain, so advance to the next chain */ - chain = iterator->next_chain + 1; - - /* Default value if no next chain found */ - iterator->next_entry = NULL; - - while (chain < hash_table->table_size) { - - /* Is there anything in this chain? */ - if (hash_table->table[chain] != NULL) { - iterator->next_entry = hash_table->table[chain]; - break; - } - - /* Try the next chain */ - ++chain; - } - - iterator->next_chain = chain; - } - - return pair; -} diff --git a/src/server/pdc_server.c b/src/server/pdc_server.c index d73fd9b9f..8260f520b 100644 --- a/src/server/pdc_server.c +++ b/src/server/pdc_server.c @@ -46,7 +46,7 @@ #include "pdc_config.h" #include "pdc_utlist.h" -#include "pdc_hash-table.h" +#include "pdc_hash_table.h" #include "pdc_interface.h" #include "pdc_analysis_pkg.h" #include "pdc_client_server_common.h" @@ -971,6 +971,7 @@ PDC_Server_init(int port, hg_class_t **hg_class, hg_context_t **hg_context) printf("==PDC_SERVER[%d]: error with PDC_Server_restart\n", pdc_server_rank_g); goto done; } + metadata_index_recover(pdc_server_tmp_dir_g, pdc_server_size_g, pdc_server_rank_g); } else { // We are starting a brand new server @@ -997,8 +998,8 @@ PDC_Server_init(int port, hg_class_t **hg_class, hg_context_t **hg_context) n_metadata_g = 0; - // Initialize DART - PDC_Server_dart_init(); + // Initialize IDIOMS + PDC_Server_metadata_index_init(pdc_server_size_g, pdc_server_rank_g); // PDC transfer_request infrastructures PDC_server_transfer_request_init(); @@ -1437,6 +1438,8 @@ PDC_Server_checkpoint() fflush(stdout); } + metadata_index_dump(pdc_server_tmp_dir_g, pdc_server_rank_g); + done: fflush(stdout); FUNC_LEAVE(ret_value); diff --git a/src/server/pdc_server_metadata.c b/src/server/pdc_server_metadata.c index 02380fcc8..f91545a89 100644 --- a/src/server/pdc_server_metadata.c +++ b/src/server/pdc_server_metadata.c @@ -42,7 +42,7 @@ #endif #include "pdc_utlist.h" -#include "pdc_hash-table.h" +#include "pdc_hash_table.h" #include "pdc_dablooms.h" #include "pdc_interface.h" #include "pdc_client_server_common.h" @@ -1593,6 +1593,46 @@ PDC_Server_get_partial_query_result(metadata_query_transfer_in_t *in, uint32_t * FUNC_LEAVE(ret_value); } +void +num_query_action_someta(void *cond_exact, void *cond_lo, void *cond_hi, int lo_inclusive, int hi_inclusive, + pdc_c_var_type_t num_type, void *input, void **out, uint64_t *out_len) +{ + void * input_val = ((pdc_kvtag_t *)input)->value; + size_t input_size = ((pdc_kvtag_t *)input)->size; + libhl_cmp_callback_t cmp_func = LIBHL_CMP_CB(num_type); + *out_len = 1; + *out = calloc(1, sizeof(uint64_t)); + pbool_t ret_value = FALSE; + if (cond_exact != NULL) { // Exact + ret_value = cmp_func(input_val, input_size, cond_exact, get_size_by_dtype(num_type)) == 0; + } + else if (cond_lo == NULL && cond_hi != NULL) { // less than + ret_value = (hi_inclusive) + ? cmp_func(input_val, input_size, cond_hi, get_size_by_dtype(num_type)) <= 0 + : cmp_func(input_val, input_size, cond_hi, get_size_by_dtype(num_type)) < 0; + } + else if (cond_lo != NULL && cond_hi == NULL) { // greater than + ret_value = (lo_inclusive) + ? cmp_func(input_val, input_size, cond_lo, get_size_by_dtype(num_type)) >= 0 + : cmp_func(input_val, input_size, cond_lo, get_size_by_dtype(num_type)) > 0; + } + else if (cond_lo != NULL && cond_hi != NULL) { // between + pbool_t lo_rst = (lo_inclusive) + ? cmp_func(input_val, input_size, cond_lo, get_size_by_dtype(num_type)) >= 0 + : cmp_func(input_val, input_size, cond_lo, get_size_by_dtype(num_type)) > 0; + pbool_t hi_rst = (hi_inclusive) + ? cmp_func(input_val, input_size, cond_hi, get_size_by_dtype(num_type)) <= 0 + : cmp_func(input_val, input_size, cond_hi, get_size_by_dtype(num_type)) < 0; + ret_value = lo_rst && hi_rst; + } + else { + } + *((uint64_t *)(*out)) = (uint64_t)ret_value; +} + +num_query_action_collection_t soMetaNumQueryActions = {num_query_action_someta, num_query_action_someta, + num_query_action_someta, num_query_action_someta}; + pbool_t _is_matching_kvtag(pdc_kvtag_t *in, pdc_kvtag_t *kvtag) { @@ -1608,7 +1648,6 @@ _is_matching_kvtag(pdc_kvtag_t *in, pdc_kvtag_t *kvtag) return FALSE; } if (in->type == (int8_t)PDC_STRING) { - // FIXME: need to address kvtag->type serialization problem. char *pattern = (char *)in->value; if (!simple_matches(kvtag->value, pattern)) { return FALSE; @@ -1616,8 +1655,13 @@ _is_matching_kvtag(pdc_kvtag_t *in, pdc_kvtag_t *kvtag) } else { // FIXME: for all numeric types, we use memcmp to compare, for exact value query, but we also // have to support range query. - if (memcmp(in->value, kvtag->value, in->size) != 0) - return FALSE; + uint64_t *out; + uint64_t out_len; + parse_and_run_number_value_query(in->value, in->type, &soMetaNumQueryActions, kvtag, &out_len, + (void **)&out); + return (pbool_t)out[0]; + // if (memcmp(in->value, kvtag->value, in->size) != 0) + // return FALSE; } FUNC_LEAVE(ret_value); @@ -1881,6 +1925,17 @@ PDC_Server_get_kvtag_query_result(pdc_kvtag_t *in /*FIXME: query input should be *n_meta = 0; *obj_ids = (void *)calloc(alloc_size, sizeof(uint64_t)); + char *v_query = (char *)in->value; + printf("==PDC_SERVER[%d] before stripQuotes: Querying kvtag with key [%s], value [%s]\n", + pdc_server_rank_g, in->name, (char *)in->value); + if (is_string_query(v_query)) { + in->value = stripQuotes(v_query); + in->type = PDC_STRING; + } + + printf("==PDC_SERVER[%d] after stripQuotes: Querying kvtag with key [%s], value [%s]\n", + pdc_server_rank_g, in->name, (char *)in->value); + if (use_rocksdb_g == 1) { ret_value = PDC_Server_query_kvtag_rocksdb(in, n_meta, obj_ids, alloc_size); if (ret_value != SUCCEED) { @@ -2840,12 +2895,13 @@ PDC_Server_add_kvtag_sqlite3(metadata_add_kvtag_in_t *in, metadata_add_tag_out_t printf("==PDC_SERVER[%d]: error from SQLite %s!\n", pdc_server_rank_g, errMessage); else out->ret = 1; + +done: #else printf("==PDC_SERVER[%d]: enabled SQLite3 but PDC is not compiled with it!\n", pdc_server_rank_g); ret_value = FAIL; #endif -done: return ret_value; } diff --git a/src/server/pdc_server_metadata_index.c b/src/server/pdc_server_metadata_index.c index 1e91c42f7..2ae744b49 100644 --- a/src/server/pdc_server_metadata_index.c +++ b/src/server/pdc_server_metadata_index.c @@ -1,443 +1,26 @@ #include "pdc_server_metadata_index.h" +#include "idioms_local_index.h" +#include "idioms_persistence.h" -#define DART_SERVER_DEBUG 0 +art_tree *art_key_prefix_tree_g = NULL; +art_tree *art_key_suffix_tree_g = NULL; +size_t num_kv_pairs_loaded_mdb = 0; +size_t num_attrs_loaded_mdb = 0; -// DART search -int64_t indexed_word_count_g = 0; -int64_t server_request_count_g = 0; -int64_t request_per_unit_time_g = 0; -double unit_time_to_update_request = 5000.0; // ms. -art_tree *art_key_prefix_tree_g = NULL; -art_tree *art_key_suffix_tree_g = NULL; +uint32_t midx_server_id_g = 0; +uint32_t midx_num_server_g = 0; -// void -// create_hash_table_for_keyword(char *keyword, char *value, size_t len, void *data) -// { -// uint32_t hashVal = djb2_hash(keyword, (int)len); -// printf("%d:", hashVal); -// gen_obj_id_in_t in; -// gen_obj_id_out_t out; - -// in.data.obj_name = keyword; -// in.data.time_step = (int32_t)data; -// in.data.user_id = (uint32_t)data; -// char *taglist = (char *)calloc(256, sizeof(char)); -// printf("%s=%s", keyword, value); -// sprintf(taglist, "%s=%s", keyword, value); -// in.data.tags = taglist; -// in.data.data_location = " "; -// in.data.app_name = " "; -// in.data.ndim = 1; -// in.hash_value = hashVal; - -// PDC_insert_metadata_to_hash_table(&in, &out); -// } - -// int -// brutal_force_partial_search(metadata_query_transfer_in_t *in, uint32_t *n_meta, void ***buf_ptrs, -// char *k_query, char *vfrom_query, char *vto_query, uint32_t *hash_value) -// { -// int result = 0; - -// uint32_t iter = 0; -// HashTableIterator hash_table_iter; -// HashTableValue * head = NULL; -// pdc_metadata_t * elt; -// int n_entry; - -// if (metadata_hash_table_g != NULL) { -// if (hash_value != NULL) { -// head = hash_table_lookup(metadata_hash_table_g, hash_value); -// if (head != NULL) { -// DL_FOREACH(head->metadata, elt) -// { -// // List all objects, no need to check other constraints -// if (in->is_list_all == 1) { -// (*buf_ptrs)[iter++] = elt; -// } -// // check if current metadata matches search constraint -// else if (is_metadata_satisfy_constraint(elt, in) == 1) { -// (*buf_ptrs)[iter++] = elt; -// } -// } -// } -// } -// else { -// n_entry = hash_table_num_entries(metadata_hash_table_g); -// hash_table_iterate(metadata_hash_table_g, &hash_table_iter); - -// while (n_entry != 0 && hash_table_iter_has_more(&hash_table_iter)) { -// head = hash_table_iter_next(&hash_table_iter); -// DL_FOREACH(head->metadata, elt) -// { -// // List all objects, no need to check other constraints -// if (in->is_list_all == 1) { -// (*buf_ptrs)[iter++] = elt; -// } -// // check if current metadata matches search constraint -// else if (is_metadata_satisfy_constraint(elt, in) == 1) { -// (*buf_ptrs)[iter++] = elt; -// } -// } -// } -// } -// *n_meta = iter; - -// printf("==PDC_SERVER: brutal_force_partial_search: Total matching results: %d\n", *n_meta); -// result = 1; -// } // if (metadata_hash_table_g != NULL) -// else { -// printf("==PDC_SERVER: metadata_hash_table_g not initilized!\n"); -// result = 0; -// } - -// return result; -// } - -// void -// search_through_hash_table(char *k_query, uint32_t index_type, pattern_type_t pattern_type, -// pdc_art_iterator_param_t *param) -// { - -// metadata_query_transfer_in_t in; -// in.is_list_all = -1; -// in.user_id = -1; -// in.app_name = " "; -// in.obj_name = " "; -// in.time_step_from = -1; -// in.time_step_to = -1; -// in.ndim = -1; -// in.tags = " "; -// char * qType_string; -// uint32_t n_meta; -// void ** buf_ptrs; -// char * tok; - -// uint32_t *hash_ptr = NULL; -// uint32_t hash_value = -1; - -// switch (pattern_type) { -// case PATTERN_EXACT: -// qType_string = "Exact"; -// tok = k_query; -// if (index_type == 1) { -// hash_value = djb2_hash(tok, (int)strlen(tok)); -// hash_ptr = &hash_value; -// } -// else if (index_type == 2) { -// hash_value = djb2_hash(tok, 1); -// hash_ptr = &hash_value; -// } -// break; -// case PATTERN_PREFIX: -// qType_string = "Prefix"; -// tok = subrstr(k_query, strlen(k_query) - 1); -// if (index_type == 2) { -// hash_value = djb2_hash(tok, 1); -// hash_ptr = &hash_value; -// } -// else { -// hash_ptr = NULL; -// } -// break; -// case PATTERN_SUFFIX: -// qType_string = "Suffix"; -// tok = substr(k_query, 1); -// tok = reverse_str(tok); -// if (index_type == 2) { -// hash_value = djb2_hash(tok, 1); -// hash_ptr = &hash_value; -// } -// else { -// hash_ptr = NULL; -// } -// break; -// case PATTERN_MIDDLE: -// qType_string = "Infix"; -// tok = substring(k_query, 1, strlen(k_query) - 1); -// break; -// default: -// break; -// } - -// int search_rst = brutal_force_partial_search(&in, &n_meta, &buf_ptrs, k_query, NULL, NULL, hash_ptr); -// int i = 0; -// for (i = 0; i < n_meta; i++) { -// pdc_metadata_t *metadata = (pdc_metadata_t *)buf_ptrs[i]; -// hashset_add(param->out, (metadata->user_id)); -// param->total_count = param->total_count + 1; -// } -// } - -// void -// delete_hash_table_for_keyword(char *keyword, size_t len, void *data) -// { -// uint32_t hashVal = djb2_hash(keyword, (int)len); - -// metadata_delete_in_t in; -// metadata_delete_out_t out; - -// in.obj_name = keyword; -// in.time_step = (int32_t)data; -// in.hash_value = hashVal; - -// PDC_delete_metadata_from_hash_table(&in, &out); -// } +IDIOMS_t *idioms_g = NULL; /****************************/ /* Initialize DART */ /****************************/ void -PDC_Server_dart_init() +PDC_Server_metadata_index_init(uint32_t num_server, uint32_t server_id) { - - indexed_word_count_g = 0; - server_request_count_g = 0; - art_key_prefix_tree_g = (art_tree *)calloc(1, sizeof(art_tree)); - art_key_suffix_tree_g = (art_tree *)calloc(1, sizeof(art_tree)); - - art_tree_init(art_key_prefix_tree_g); - art_tree_init(art_key_suffix_tree_g); -} - -/****************************/ -/* Create index item for KV in DART */ -/****************************/ - -// #define PDC_DART_SFX_TREE - -perr_t -create_prefix_index_for_attr_value(void **index, unsigned char *attr_value, void *data) -{ - perr_t ret = SUCCEED; - if (*index == NULL) { - *index = (art_tree *)PDC_calloc(1, sizeof(art_tree)); - art_tree_init(*index); - } - - art_tree *art_value_prefix_tree = (art_tree *)*index; - - int len = strlen((const char *)attr_value); - Set *obj_id_set = (Set *)art_search(art_value_prefix_tree, attr_value, len); - if (obj_id_set == NULL) { - obj_id_set = set_new(ui64_hash, ui64_equal); - set_register_free_function(obj_id_set, free); - art_insert(art_value_prefix_tree, attr_value, len, (void *)obj_id_set); - } - - int indexed = set_insert(obj_id_set, data); - - if (indexed == -1) { - return FAIL; - } - - return ret; -} - -art_tree * -create_index_for_attr_name(char *attr_name, char *attr_value, void *data) -{ - - int len = strlen(attr_name); - key_index_leaf_content *leaf_content = NULL; - art_tree * nm_trie = NULL; - unsigned char * nm_key = NULL; - -#ifndef PDC_DART_SFX_TREE - int rr = 0; - for (rr = 0; rr < 2; rr++) { - nm_key = (rr == 1) ? (unsigned char *)reverse_str(attr_name) : (unsigned char *)attr_name; - nm_trie = (rr == 1) ? art_key_suffix_tree_g : art_key_prefix_tree_g; -#else - int sub_loop_count = len; // should be 'len', but we already iterate all suffixes at client side - nm_trie = art_key_prefix_tree_g; - for (int j = 0; j < sub_loop_count; j++) { - nm_key = (unsigned char *)substring(attr_name, j, len); -#endif - key_index_leaf_content *leafcnt = - (key_index_leaf_content *)art_search(nm_trie, nm_key, strlen((const char *)nm_key)); - if (leafcnt == NULL) { - leafcnt = (key_index_leaf_content *)PDC_calloc(1, sizeof(key_index_leaf_content)); - leafcnt->extra_prefix_index = (art_tree *)PDC_calloc(1, sizeof(art_tree)); - art_tree_init((art_tree *)leafcnt->extra_prefix_index); -#ifndef PDC_DART_SFX_TREE - // we only enable suffix index when suffix tree mode is off. - leafcnt->extra_suffix_index = (art_tree *)PDC_calloc(1, sizeof(art_tree)); - art_tree_init((art_tree *)leafcnt->extra_suffix_index); -#endif - // TODO: build local index for range query. - leafcnt->extra_range_index = (art_tree *)PDC_calloc(1, sizeof(art_tree)); - art_tree_init((art_tree *)leafcnt->extra_range_index); - - art_insert(nm_trie, nm_key, strlen((const char *)nm_key), leafcnt); - } - - art_tree *secondary_trie = NULL; - -#ifndef PDC_DART_SFX_TREE - int r = 0; - for (r = 0; r < 2; r++) { - unsigned char *val_key = - (r == 1 ? (unsigned char *)reverse_str(attr_value) : (unsigned char *)attr_value); - secondary_trie = (r == 1 ? (art_tree *)(leafcnt->extra_suffix_index) - : (art_tree *)(leafcnt->extra_prefix_index)); - -#else - secondary_trie = (art_tree *)(leafcnt->extra_prefix_index); - int val_len = strlen(attr_value); - for (int jj = 0; jj < val_len; jj++) { - unsigned char *val_key = (unsigned char *)substring(attr_value, jj, val_len); -#endif - create_prefix_index_for_attr_value((void **)&secondary_trie, val_key, data); - } // this matches with the 'r' loop or 'jj' loop - } // this matches with the 'rr' loop or 'j' loop - return nm_trie; -} - -perr_t -metadata_index_create(char *attr_key, char *attr_value, uint64_t obj_locator, int8_t index_type) -{ - perr_t ret_value = FAIL; - stopwatch_t timer; - timer_start(&timer); - uint64_t *data = (uint64_t *)calloc(1, sizeof(uint64_t)); - *data = obj_locator; - - // if (index_type == DHT_FULL_HASH) { - // FIXME: remember to check obj_locator type inside of this function below - // create_hash_table_for_keyword(attr_key, attr_value, strlen(attr_key), (void *)data); - // } - // else if (index_type == DHT_INITIAL_HASH) { - // FIXME: remember to check obj_locator type inside of this function below - // create_hash_table_for_keyword(attr_key, attr_value, 1, (void *)data); - // } - // else if (index_type == DART_HASH) { - create_index_for_attr_name(attr_key, attr_value, (void *)data); - // } - timer_pause(&timer); - // if (DART_SERVER_DEBUG) { - // printf("[Server_Side_Insert_%d] Timer to insert a keyword %s : %s into index = %.4f - // microseconds\n", - // pdc_server_rank_g, attr_key, attr_value, timer_delta_us(&timer)); - // } - indexed_word_count_g++; - ret_value = SUCCEED; - return ret_value; -} - -/****************************/ -/* Delete index item for KV in DART */ -/****************************/ - -perr_t -delete_prefix_index_for_attr_value(void **index, unsigned char *attr_value, void *data) -{ - perr_t ret = SUCCEED; - if (*index == NULL) { - // println("The value prefix tree is NULL, there is nothing to delete."); - return ret; - } - - art_tree *art_value_prefix_tree = (art_tree *)*index; - - int len = strlen((const char *)attr_value); - Set *obj_id_set = (Set *)art_search(art_value_prefix_tree, attr_value, len); - if (obj_id_set == NULL) { - // println("The obj_id_set is NULL, there nothing more to delete."); - if (art_size(art_value_prefix_tree) == 0) { - art_tree_destroy(*index); - } - return ret; - } - - if (set_query(obj_id_set, data) != 0) { - set_remove(obj_id_set, data); - } - - if (set_num_entries(obj_id_set) == 0) { - art_delete(art_value_prefix_tree, attr_value, len); - set_free(obj_id_set); - } - return ret; -} - -void -delete_index_for_attr_name(char *attr_name, char *attr_value, void *data) -{ - int len = strlen(attr_name); - key_index_leaf_content *leaf_content = NULL; - art_tree * nm_trie = NULL; - unsigned char * nm_key = NULL; - -#ifndef PDC_DART_SFX_TREE - int rr = 0; - for (rr = 0; rr < 2; rr++) { - nm_key = rr == 1 ? (unsigned char *)reverse_str(attr_name) : (unsigned char *)attr_name; - nm_trie = rr == 1 ? art_key_suffix_tree_g : art_key_prefix_tree_g; -#else - int sub_loop_count = 1; // should be 'len', but we already iterate all suffixes at client side; - nm_trie = art_key_prefix_tree_g; - for (int j = 0; j < sub_loop_count; j++) { - nm_key = (unsigned char *)substring(attr_name, j, len); -#endif - key_index_leaf_content *leafcnt = - (key_index_leaf_content *)art_search(nm_trie, nm_key, strlen((const char *)nm_key)); - if (leafcnt == NULL) { - art_delete(nm_trie, nm_key, strlen((const char *)nm_key)); - } - else { - art_tree *secondary_trie = NULL; -#ifndef PDC_DART_SFX_TREE - int r = 0; - for (r = 0; r < 2; r++) { - secondary_trie = (r == 1 ? (art_tree *)(leafcnt->extra_suffix_index) - : (art_tree *)(leafcnt->extra_prefix_index)); - unsigned char *val_key = - (r == 1 ? (unsigned char *)reverse_str(attr_value) : (unsigned char *)attr_value); -#else - secondary_trie = (art_tree *)(leafcnt->extra_prefix_index); - for (int jj = 0; jj < strlen(attr_value); jj++) { - unsigned char *val_key = (unsigned char *)substring(attr_value, jj, strlen(attr_value)); -#endif - delete_prefix_index_for_attr_value((void **)&secondary_trie, val_key, data); - } - if (leafcnt->extra_suffix_index == NULL && leafcnt->extra_prefix_index == NULL) { - art_delete(nm_trie, nm_key, len); - leafcnt = NULL; - } - // TODO: deal with index for range query. - } // this matches with the 'r' loop or 'jj' loop - } // this matches with the 'rr' loop or 'j' loop -} - -perr_t -metadata_index_delete(char *attr_key, char *attr_value, uint64_t obj_locator, int8_t index_type) -{ - perr_t ret_value = FAIL; - stopwatch_t timer; - timer_start(&timer); - uint64_t *data = (uint64_t *)calloc(1, sizeof(uint64_t)); - *data = obj_locator; - - // if (index_type == DHT_FULL_HASH) { - // delete_hash_table_for_keyword(attr_key, strlen(attr_key), (void *)obj_locator); - // } - // else if (index_type == DHT_INITIAL_HASH) { - // delete_hash_table_for_keyword(attr_key, 1, (void *)obj_locator); - // } - // else if (index_type == DART_HASH) { - delete_index_for_attr_name(attr_key, attr_value, (void *)data); - // } - - timer_pause(&timer); - // if (DART_SERVER_DEBUG) { - // printf("[Server_Side_Delete_%d] Timer to delete a keyword %s : %s from index = %.4f - // microseconds\n", - // pdc_server_rank_g, attr_key, attr_value, timer_delta_us(&timer)); - // } - indexed_word_count_g--; - ret_value = SUCCEED; - return ret_value; + midx_num_server_g = num_server; + midx_server_id_g = server_id; + idioms_g = IDIOMS_init(server_id, num_server); } /****************************/ @@ -449,256 +32,34 @@ PDC_Server_dart_get_server_info(dart_get_server_info_in_t *in, dart_get_server_i { perr_t ret_value = SUCCEED; FUNC_ENTER(NULL); - uint32_t serverId = in->serverId; - out->indexed_word_count = indexed_word_count_g; - out->request_count = server_request_count_g; - FUNC_LEAVE(ret_value); -} - -/** - * The callback function performs on each prefix on secondary art_tree - * - * - */ -int -level_two_art_callback(void *data, const unsigned char *key, uint32_t key_len, void *value) -{ - pdc_art_iterator_param_t *param = (pdc_art_iterator_param_t *)(data); - // println("Level two start"); - if (param->level_two_infix != NULL) { - if (contains((const char *)key, (const char *)param->level_two_infix) == 0) { - return 0; - } - } - if (value != NULL) { - Set * obj_id_set = (Set *)value; - SetIterator value_set_iter; - set_iterate(obj_id_set, &value_set_iter); - - while (set_iter_has_more(&value_set_iter)) { - uint64_t *item = (uint64_t *)set_iter_next(&value_set_iter); - uint64_t *itemValue = (uint64_t *)calloc(1, sizeof(uint64_t)); - *itemValue = *item; - set_insert(param->out, itemValue); - } - } - // println("Level two finish"); - return 0; -} - -/** - * The callback function performs on each prefix on a art_tree. - * - */ -int -level_one_art_callback(void *data, const unsigned char *key, uint32_t key_len, void *value) -{ - key_index_leaf_content * leafcnt = (key_index_leaf_content *)value; - pdc_art_iterator_param_t *param = (pdc_art_iterator_param_t *)(data); - - if (param->level_one_infix != NULL) { - if (contains((char *)key, param->level_one_infix) == 0) { - return 0; - } - } - - char *secondary_query = param->query_str; - // param->total_count = 0; - // param->out = NULL; - if (strchr(secondary_query, '~')) { - // TODO: DO RANGE QUERY HERE. currently no solution for range query. - // - } - else { - // DO TEXT QUERY HERE. - pattern_type_t level_two_ptn_type = determine_pattern_type(secondary_query); - char * tok = NULL; - switch (level_two_ptn_type) { - case PATTERN_EXACT: - tok = secondary_query; - if (leafcnt->extra_prefix_index != NULL) { - Set *obj_id_set = - (Set *)art_search(leafcnt->extra_prefix_index, (unsigned char *)tok, strlen(tok)); - if (obj_id_set != NULL) { - level_two_art_callback((void *)param, (unsigned char *)tok, strlen(tok), - (void *)obj_id_set); - } - } - break; - case PATTERN_PREFIX: - tok = subrstr(secondary_query, strlen(secondary_query) - 1); - if (leafcnt->extra_prefix_index != NULL) { - art_iter_prefix((art_tree *)leafcnt->extra_prefix_index, (unsigned char *)tok, - strlen(tok), level_two_art_callback, param); - } - break; - case PATTERN_SUFFIX: - tok = substr(secondary_query, 1); - art_tree *secondary_trie = NULL; -#ifndef PDC_DART_SFX_TREE - tok = reverse_str(tok); - secondary_trie = (art_tree *)leafcnt->extra_suffix_index; -#else - secondary_trie = (art_tree *)leafcnt->extra_prefix_index; -#endif - if (secondary_trie != NULL) { -#ifndef PDC_DART_SFX_TREE - art_iter_prefix(secondary_trie, (unsigned char *)tok, strlen(tok), level_two_art_callback, - param); -#else - Set *obj_id_set = (Set *)art_search(secondary_trie, (unsigned char *)tok, strlen(tok)); - if (obj_id_set != NULL) { - level_two_art_callback((void *)param, (unsigned char *)tok, strlen(tok), - (void *)obj_id_set); - } -#endif - } - break; - case PATTERN_MIDDLE: - tok = substring(secondary_query, 1, strlen(secondary_query) - 1); - secondary_trie = (art_tree *)leafcnt->extra_prefix_index; - if (secondary_trie != NULL) { -#ifndef PDC_DART_SFX_TREE - param->level_two_infix = tok; - art_iter(secondary_trie, level_two_art_callback, param); -#else - art_iter_prefix(secondary_trie, (unsigned char *)tok, strlen(tok), level_two_art_callback, - param); -#endif - } - break; - default: - break; - } - } - return 0; -} - -perr_t -metadata_index_search(char *query, int index_type, uint64_t *n_obj_ids_ptr, uint64_t **buf_ptrs) -{ - - perr_t result = SUCCEED; - stopwatch_t index_timer; - - char *kdelim_ptr = strchr(query, (int)'='); + uint32_t serverId = in->serverId; - char *k_query = get_key(query, '='); - char *v_query = get_value(query, '='); + out->indexed_word_count = idioms_g->index_record_count_g; + out->request_count = idioms_g->search_request_count_g; - if (DART_SERVER_DEBUG) { - println("[Server_Side_Query_%d] k_query = '%s' | v_query = '%s' ", pdc_server_rank_g, k_query, - v_query); - } - - pdc_art_iterator_param_t *param = (pdc_art_iterator_param_t *)calloc(1, sizeof(pdc_art_iterator_param_t)); - param->level_one_infix = NULL; - param->level_two_infix = NULL; - param->query_str = v_query; - param->out = set_new(ui64_hash, ui64_equal); - set_register_free_function(param->out, free); - - timer_start(&index_timer); - - char *qType_string = "Exact"; - - if (NULL == kdelim_ptr) { - if (DART_SERVER_DEBUG) { - println("[Server_Side_Query_%d]query string '%s' is not valid.", pdc_server_rank_g, query); - } - *n_obj_ids_ptr = 0; - return result; - } - else { - char *tok; - // println("k_query %s, v_query %s", k_query, v_query); - pattern_type_t level_one_ptn_type = determine_pattern_type(k_query); - key_index_leaf_content *leafcnt = NULL; - // if (index_type == DHT_FULL_HASH || index_type == DHT_INITIAL_HASH) { - // search_through_hash_table(k_query, index_type, level_one_ptn_type, param); - // } - // else { - switch (level_one_ptn_type) { - case PATTERN_EXACT: - qType_string = "Exact"; - tok = k_query; - leafcnt = (key_index_leaf_content *)art_search(art_key_prefix_tree_g, (unsigned char *)tok, - strlen(tok)); - if (leafcnt != NULL) { - level_one_art_callback((void *)param, (unsigned char *)tok, strlen(tok), (void *)leafcnt); - } - break; - case PATTERN_PREFIX: - qType_string = "Prefix"; - tok = subrstr(k_query, strlen(k_query) - 1); - art_iter_prefix((art_tree *)art_key_prefix_tree_g, (unsigned char *)tok, strlen(tok), - level_one_art_callback, param); - break; - case PATTERN_SUFFIX: - qType_string = "Suffix"; - tok = substr(k_query, 1); -#ifndef PDC_DART_SFX_TREE - tok = reverse_str(tok); - art_iter_prefix((art_tree *)art_key_suffix_tree_g, (unsigned char *)tok, strlen(tok), - level_one_art_callback, param); -#else - leafcnt = (key_index_leaf_content *)art_search(art_key_prefix_tree_g, (unsigned char *)tok, - strlen(tok)); - if (leafcnt != NULL) { - level_one_art_callback((void *)param, (unsigned char *)tok, strlen(tok), (void *)leafcnt); - } -#endif - break; - case PATTERN_MIDDLE: - qType_string = "Infix"; - tok = substring(k_query, 1, strlen(k_query) - 1); -#ifndef PDC_DART_SFX_TREE - param->level_one_infix = tok; - art_iter(art_key_prefix_tree_g, level_one_art_callback, param); -#else - art_iter_prefix(art_key_prefix_tree_g, (unsigned char *)tok, strlen(tok), - level_one_art_callback, param); -#endif - break; - default: - break; - } - // } - } - - uint32_t i = 0; - - *n_obj_ids_ptr = set_num_entries(param->out); - *buf_ptrs = (uint64_t *)calloc(*n_obj_ids_ptr, sizeof(uint64_t)); - - SetIterator iter; - set_iterate(param->out, &iter); - while (set_iter_has_more(&iter)) { - uint64_t *item = (uint64_t *)set_iter_next(&iter); - (*buf_ptrs)[i] = *item; - i++; - } - set_free(param->out); - - timer_pause(&index_timer); - if (DART_SERVER_DEBUG) { - printf("[Server_Side_%s_%d] Time to address query '%s' and get %d results = %.4f microseconds\n", - qType_string, pdc_server_rank_g, query, *n_obj_ids_ptr, timer_delta_us(&index_timer)); - } - server_request_count_g++; - return result; + FUNC_LEAVE(ret_value); } perr_t PDC_Server_dart_perform_one_server(dart_perform_one_server_in_t *in, dart_perform_one_server_out_t *out, uint64_t *n_obj_ids_ptr, uint64_t **buf_ptrs) { - perr_t result = SUCCEED; - dart_op_type_t op_type = in->op_type; - dart_hash_algo_t hash_algo = in->hash_algo; - char * attr_key = (char *)in->attr_key; - char * attr_val = (char *)in->attr_val; - dart_object_ref_type_t ref_type = in->obj_ref_type; + perr_t result = SUCCEED; + dart_op_type_t op_type = in->op_type; + dart_hash_algo_t hash_algo = in->hash_algo; + char * attr_key = (char *)in->attr_key; + void * attr_val = in->attr_val; + uint32_t attr_vsize = in->attr_vsize; + pdc_c_var_type_t attr_dtype = in->attr_vtype; + dart_object_ref_type_t ref_type = in->obj_ref_type; + + IDIOMS_md_idx_record_t *idx_record = (IDIOMS_md_idx_record_t *)calloc(1, sizeof(IDIOMS_md_idx_record_t)); + idx_record->key = attr_key; + idx_record->value = attr_val; + idx_record->virtual_node_id = in->vnode_id; + idx_record->type = in->attr_vtype; + idx_record->value_len = in->attr_vsize; + idx_record->src_client_id = in->src_client_id; uint64_t obj_locator = in->obj_primary_ref; if (ref_type == REF_PRIMARY_ID) { @@ -710,21 +71,52 @@ PDC_Server_dart_perform_one_server(dart_perform_one_server_in_t *in, dart_perfor else if (ref_type == REF_SERVER_ID) { obj_locator = in->obj_server_ref; } + + idx_record->obj_ids = (uint64_t *)calloc(1, sizeof(uint64_t)); + idx_record->obj_ids[0] = obj_locator; + idx_record->num_obj_ids = 1; + out->has_bulk = 0; - // printf("Respond to: in->op_type=%d\n", in->op_type ); if (op_type == OP_INSERT) { - metadata_index_create(attr_key, attr_val, obj_locator, hash_algo); + idx_record->is_key_suffix = in->inserting_suffix; + idioms_local_index_create(idioms_g, idx_record); } else if (op_type == OP_DELETE) { - metadata_index_delete(attr_key, attr_val, obj_locator, hash_algo); + idx_record->is_key_suffix = in->inserting_suffix; + idioms_local_index_delete(idioms_g, idx_record); } else { - char *query = (char *)in->attr_key; - result = metadata_index_search(query, hash_algo, n_obj_ids_ptr, buf_ptrs); + // printf("attr_key=%s, attr_val=%s, attr_vsize=%d, attr_dtype=%d\n", attr_key, attr_val, attr_vsize, + // attr_dtype); + idx_record->num_obj_ids = 0; + idioms_local_index_search(idioms_g, idx_record); + *n_obj_ids_ptr = idx_record->num_obj_ids; + *buf_ptrs = idx_record->obj_ids; + out->n_items = (*n_obj_ids_ptr); if ((*n_obj_ids_ptr) > 0) { out->has_bulk = 1; } } return result; +} + +// ********************* Index Dump ********************* + +perr_t +metadata_index_dump(char *checkpiont_dir, uint32_t serverID) +{ + perr_t ret_value = SUCCEED; + ret_value = idioms_metadata_index_dump(idioms_g, checkpiont_dir, serverID); + return ret_value; +} + +// ********************* Index Recover ********************* + +perr_t +metadata_index_recover(char *checkpiont_dir, int num_server, uint32_t serverID) +{ + perr_t ret_value = SUCCEED; + ret_value = idioms_metadata_index_recover(idioms_g, checkpiont_dir, num_server, serverID); + return ret_value; } \ No newline at end of file diff --git a/src/server/pdc_server_metadata_index_test.c b/src/server/pdc_server_metadata_index_test.c index d385c717f..43b659225 100644 --- a/src/server/pdc_server_metadata_index_test.c +++ b/src/server/pdc_server_metadata_index_test.c @@ -2,38 +2,157 @@ #include #include #include "pdc_server_metadata_index.h" +#include "idioms_local_index.h" +#include "pdc_logger.h" +#include "dart_core.h" void -insert_kv_to_index(char *key, char *value, uint64_t obj_id) +delete_kv_from_index(char *kv, uint64_t obj_id) { + char * key = NULL; + char * value = NULL; + int8_t kv_dtype = PDC_STRING; + dart_perform_one_server_in_t input; dart_perform_one_server_out_t output; + if (kv && contains(kv, "=")) { + key = substring(kv, 0, indexOf(kv, '=')); + value = substring(kv, indexOf(kv, '=') + 1, strlen(kv)); + if (is_number_query(value)) { + if (contains(value, ".")) { + kv_dtype = PDC_DOUBLE; + } + else { + kv_dtype = PDC_INT64; + } + get_number_from_string(value, kv_dtype, &(input.attr_val)); + input.attr_vsize = get_size_by_class_n_type(input.attr_val, 1, PDC_CLS_ITEM, kv_dtype); + } + else { + input.attr_val = stripQuotes(value); + input.attr_vsize = strlen(input.attr_val) + 1; + } + } + else { + LOG_ERROR("Invalid Key Value Pair!\n"); + return; + } + input.obj_ref_type = REF_PRIMARY_ID; input.hash_algo = DART_HASH; - // Test Insert Index - input.op_type = OP_INSERT; - input.attr_val = value; + // Test delete Index + input.op_type = OP_DELETE; input.obj_primary_ref = obj_id; + input.attr_vtype = kv_dtype; + +#ifndef PDC_DART_SFX_TREE + input.inserting_suffix = 0; + input.attr_key = strdup(key); + assert(PDC_Server_dart_perform_one_server(&input, &output, NULL, NULL) == SUCCEED); + + input.inserting_suffix = 1; + input.attr_key = reverse_str(key); + assert(PDC_Server_dart_perform_one_server(&input, &output, NULL, NULL) == SUCCEED); + +#else for (int i = 0; i < strlen(key); i++) { + if (i == 0) { + input.inserting_suffix = 0; + } + else { + input.inserting_suffix = 1; + } input.attr_key = substring(key, i, strlen(key)); assert(PDC_Server_dart_perform_one_server(&input, &output, NULL, NULL) == SUCCEED); - // printf("Index Insertion Successful!\n"); } + +#endif } void -query_result_from_kvtag(char *key_value_query, int8_t op_type) +insert_kv_to_index(char *kv, uint64_t obj_id) { + + char * key = NULL; + void * value = NULL; + int8_t kv_dtype = PDC_STRING; + dart_perform_one_server_in_t input; dart_perform_one_server_out_t output; - uint64_t n_obj_ids = 0; - uint64_t * buf_ptr = NULL; - input.op_type = op_type; - input.attr_key = key_value_query; - assert(PDC_Server_dart_perform_one_server(&input, &output, &n_obj_ids, &buf_ptr) == SUCCEED); - printf("Query Successful! %d Results: ", n_obj_ids); + + LOG_DEBUG("Inserting %s\n", kv); + if (kv && contains(kv, "=")) { + key = substring(kv, 0, indexOf(kv, '=')); + value = substring(kv, indexOf(kv, '=') + 1, strlen(kv)); + if (is_number_query(value)) { + if (contains(value, ".")) { + kv_dtype = PDC_DOUBLE; + } + else { + kv_dtype = PDC_INT64; + } + get_number_from_string(value, kv_dtype, &(input.attr_val)); + input.attr_vsize = get_size_by_class_n_type(input.attr_val, 1, PDC_CLS_ITEM, kv_dtype); + } + else { + input.attr_val = stripQuotes(value); + input.attr_vsize = strlen(input.attr_val) + 1; + } + } + else { + LOG_ERROR("Invalid Key Value Pair!\n"); + return; + } + + input.obj_ref_type = REF_PRIMARY_ID; + input.hash_algo = DART_HASH; + // Test Insert Index + input.op_type = OP_INSERT; + + input.obj_primary_ref = obj_id; + input.attr_vtype = kv_dtype; + +#ifndef PDC_DART_SFX_TREE + input.inserting_suffix = 0; + input.attr_key = strdup(key); + assert(PDC_Server_dart_perform_one_server(&input, &output, NULL, NULL) == SUCCEED); + + input.inserting_suffix = 1; + input.attr_key = reverse_str(key); + assert(PDC_Server_dart_perform_one_server(&input, &output, NULL, NULL) == SUCCEED); + +#else + + for (int i = 0; i < strlen(key); i++) { + if (i == 0) { + input.inserting_suffix = 0; + } + else { + input.inserting_suffix = 1; + } + input.attr_key = substring(key, i, strlen(key)); + input.vnode_id = 1; + assert(PDC_Server_dart_perform_one_server(&input, &output, NULL, NULL) == SUCCEED); + } + +#endif +} + +void +query_result_from_kvtag(char *key_value_query, int8_t op_type) +{ + dart_perform_one_server_in_t *input = + (dart_perform_one_server_in_t *)calloc(1, sizeof(dart_perform_one_server_in_t)); + dart_perform_one_server_out_t *output = + (dart_perform_one_server_out_t *)calloc(1, sizeof(dart_perform_one_server_out_t)); + uint64_t n_obj_ids = 0; + uint64_t *buf_ptr = NULL; + input->op_type = op_type; + input->attr_key = key_value_query; + assert(PDC_Server_dart_perform_one_server(input, output, &n_obj_ids, &buf_ptr) == SUCCEED); + NLF_LOG_INFO("Query %s Successful! %d Results: ", key_value_query, n_obj_ids); for (int i = 0; i < n_obj_ids; i++) { printf("%llu, ", buf_ptr[i]); } @@ -44,31 +163,147 @@ void test_PDC_Server_dart_perform_one_server() { - PDC_Server_dart_init(); + PDC_Server_metadata_index_init(1, 0); - char *key = (char *)calloc(100, sizeof(char)); - char *val = (char *)calloc(100, sizeof(char)); + char *kv = (char *)calloc(20, sizeof(char)); + char *numkv = (char *)calloc(20, sizeof(char)); - for (int i = 0; i < 100; i++) { - sprintf(key, "key%03dkey", i); - sprintf(val, "val%03dval", i); - printf("Inserting %s, %s\n", key, val); - insert_kv_to_index(key, val, 10000 + i); + for (int i = 0; i < 1000; i++) { + sprintf(kv, "key%03dkey=\"val%03dval\"", i, i); + sprintf(numkv, "num%03dnum=%d", i, i); + insert_kv_to_index(kv, 10000 + i); + sprintf(numkv, "num%03dnum=%d", i, i); + insert_kv_to_index(numkv, 10000 + i); } - insert_kv_to_index("0key", "0val", 20000); - insert_kv_to_index("key000key", "val000val", 30000); + insert_kv_to_index("0key=\"0val\"", 20000); + insert_kv_to_index("key000key=\"val000val\"", 10000); + insert_kv_to_index("key000key=\"val000val\"", 20000); + insert_kv_to_index("key000key=\"val000val\"", 30000); + insert_kv_to_index("key433key=\"val433val\"", 30000); + + insert_kv_to_index("0num=0", 20000); + insert_kv_to_index("num000num=0", 10000); + insert_kv_to_index("num010num=2", 20000); + insert_kv_to_index("num010num=3", 30000); + insert_kv_to_index("num010num=5", 50000); + insert_kv_to_index("num010num=6", 60000); + insert_kv_to_index("num010num=7", 70000); + insert_kv_to_index("num010num=9", 90000); + + insert_kv_to_index("num001num=0", 11000); + insert_kv_to_index("num011num=2", 21000); + insert_kv_to_index("num011num=3", 31000); + insert_kv_to_index("num011num=5", 51000); + insert_kv_to_index("num011num=6", 61000); + insert_kv_to_index("num011num=7", 71000); + insert_kv_to_index("num011num=9", 91000); + + insert_kv_to_index("num000num=0", 30000); + insert_kv_to_index("num433num=433", 30000); + + LOG_INFO("Index Insertion Successful!\n"); + + // key000key val000val + query_result_from_kvtag("key000key=\"val000val\"", OP_EXACT_QUERY); + query_result_from_kvtag("0key=\"0val\"", OP_EXACT_QUERY); + query_result_from_kvtag("key01*=\"val01*\"", OP_PREFIX_QUERY); + query_result_from_kvtag("*33key=\"*33val\"", OP_SUFFIX_QUERY); + query_result_from_kvtag("*43*=\"*43*\"", OP_INFIX_QUERY); + + query_result_from_kvtag("num01*=5~", OP_RANGE_QUERY); + query_result_from_kvtag("num000num=0", OP_EXACT_QUERY); + query_result_from_kvtag("num01*=~5", OP_RANGE_QUERY); + query_result_from_kvtag("0num=0", OP_EXACT_QUERY); + query_result_from_kvtag("num01*=5~9", OP_RANGE_QUERY); + query_result_from_kvtag("num01*=5|~|9", OP_RANGE_QUERY); + + LOG_INFO("Index Dumping...\n"); + // save the index to file + metadata_index_dump("/workspaces/pdc/build/bin", 0); + + for (int i = 0; i < 1000; i++) { + sprintf(kv, "key%03dkey=\"val%03dval\"", i, i); + // LOG_DEBUG("Deleting %s\n", kv); + delete_kv_from_index(kv, 10000 + i); + sprintf(numkv, "num%03dnum=%d", i, i); + delete_kv_from_index(numkv, 10000 + i); + } - query_result_from_kvtag("key000key=val000val", OP_EXACT_QUERY); - query_result_from_kvtag("0key=0val", OP_EXACT_QUERY); - query_result_from_kvtag("key01*=val01*", OP_PREFIX_QUERY); - query_result_from_kvtag("*3key=*3val", OP_SUFFIX_QUERY); - query_result_from_kvtag("*9*=*9*", OP_INFIX_QUERY); + delete_kv_from_index("0key=\"0val\"", 20000); + delete_kv_from_index("key000key=\"val000val\"", 10000); + delete_kv_from_index("key000key=\"val000val\"", 20000); + delete_kv_from_index("key000key=\"val000val\"", 30000); + delete_kv_from_index("key433key=\"val433val\"", 30000); + + delete_kv_from_index("0num=0", 20000); + delete_kv_from_index("num000num=0", 10000); + delete_kv_from_index("num010num=2", 20000); + delete_kv_from_index("num010num=3", 30000); + delete_kv_from_index("num010num=5", 50000); + delete_kv_from_index("num010num=6", 60000); + delete_kv_from_index("num010num=7", 70000); + delete_kv_from_index("num010num=9", 90000); + + delete_kv_from_index("num001num=0", 11000); + delete_kv_from_index("num011num=2", 21000); + delete_kv_from_index("num011num=3", 31000); + delete_kv_from_index("num011num=5", 51000); + delete_kv_from_index("num011num=6", 61000); + delete_kv_from_index("num011num=7", 71000); + delete_kv_from_index("num011num=9", 91000); + + delete_kv_from_index("num000num=0", 30000); + delete_kv_from_index("num433num=433", 30000); + + LOG_INFO("Index Deletion Successful!\n"); + + query_result_from_kvtag("key000key=\"val000val\"", OP_EXACT_QUERY); + query_result_from_kvtag("0key=\"0val\"", OP_EXACT_QUERY); + query_result_from_kvtag("key01*=\"val01*\"", OP_PREFIX_QUERY); + query_result_from_kvtag("*33key=\"*l\"", OP_SUFFIX_QUERY); + query_result_from_kvtag("*43*=\"*43*\"", OP_INFIX_QUERY); + + query_result_from_kvtag("num01*=5~", OP_RANGE_QUERY); + query_result_from_kvtag("num000num=0", OP_EXACT_QUERY); + query_result_from_kvtag("num01*=~5", OP_RANGE_QUERY); + query_result_from_kvtag("0num=0", OP_EXACT_QUERY); + query_result_from_kvtag("num01*=5~9", OP_RANGE_QUERY); + query_result_from_kvtag("num01*=5|~|9", OP_RANGE_QUERY); + + metadata_index_recover("/workspaces/pdc/build/bin", 1, 0); + + LOG_INFO("Index Recovery Done!\n"); + + // key000key val000val + query_result_from_kvtag("key000key=\"val000val\"", OP_EXACT_QUERY); + query_result_from_kvtag("0key=\"0val\"", OP_EXACT_QUERY); + query_result_from_kvtag("key01*=\"val01*\"", OP_PREFIX_QUERY); + query_result_from_kvtag("*33key=\"*33val\"", OP_SUFFIX_QUERY); + query_result_from_kvtag("*43*=\"*43*\"", OP_INFIX_QUERY); + + query_result_from_kvtag("num01*=5~", OP_RANGE_QUERY); + query_result_from_kvtag("num000num=0", OP_EXACT_QUERY); + query_result_from_kvtag("num01*=~5", OP_RANGE_QUERY); + query_result_from_kvtag("0num=0", OP_EXACT_QUERY); + query_result_from_kvtag("num01*=5~9", OP_RANGE_QUERY); + query_result_from_kvtag("num01*=5|~|9", OP_RANGE_QUERY); +} +int +init_default_logger() +{ + setLogFile(LOG_LEVEL_ERROR, "stderr"); + setLogFile(LOG_LEVEL_WARNING, "stdout"); + setLogFile(LOG_LEVEL_INFO, "stdout"); + setLogFile(LOG_LEVEL_DEBUG, "stdout"); + setLogLevel(LOG_LEVEL_DEBUG); + return 0; } int main() { + init_default_logger(); test_PDC_Server_dart_perform_one_server(); return 0; } \ No newline at end of file diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 30667ddcd..2c0987c86 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -153,9 +153,12 @@ endforeach(program) if(BUILD_MPI_TESTING) set(MPI_PROGRAMS kvtag_query_scale_col + kvtag_range_query_scale + kvtag_affix_query_scale # kvtag_query_mpi kvtag_add_get_benchmark kvtag_add_get_scale + llsm_idioms_bench ) foreach(program ${MPI_PROGRAMS}) diff --git a/src/tests/dart_algo_sim.c b/src/tests/dart_algo_sim.c index 5b216dd8e..838e595a1 100644 --- a/src/tests/dart_algo_sim.c +++ b/src/tests/dart_algo_sim.c @@ -437,28 +437,24 @@ main(int argc, char **argv) if (INPUT_TYPE == INPUT_DICTIONARY) { // Init dart space. alphabet_size = 29; - dart_space_init(&dart_g, num_server, num_server, alphabet_size, extra_tree_height, - replication_factor); + __dart_space_init(&dart_g, num_server, alphabet_size, extra_tree_height, replication_factor, 1024); read_words_from_text(txtFilePath, &word_count, &req_count, prefix_len, keyword_insert[hashalgo], keyword_search[hashalgo]); } else if (INPUT_TYPE == INPUT_RANDOM_STRING) { alphabet_size = 129; - dart_space_init(&dart_g, num_server, num_server, alphabet_size, extra_tree_height, - replication_factor); + __dart_space_init(&dart_g, num_server, alphabet_size, extra_tree_height, replication_factor, 1024); gen_random_strings_with_cb(word_count, 6, 16, alphabet_size, prefix_len, keyword_insert[hashalgo], keyword_search[hashalgo]); } else if (INPUT_TYPE == INPUT_UUID) { alphabet_size = 37; - dart_space_init(&dart_g, num_server, num_server, alphabet_size, extra_tree_height, - replication_factor); + __dart_space_init(&dart_g, num_server, alphabet_size, extra_tree_height, replication_factor, 1024); gen_uuids(word_count, prefix_len, keyword_insert[hashalgo], keyword_search[hashalgo]); } else if (INPUT_TYPE == INPUT_WIKI_KEYWORD) { alphabet_size = 129; - dart_space_init(&dart_g, num_server, num_server, alphabet_size, extra_tree_height, - replication_factor); + __dart_space_init(&dart_g, num_server, alphabet_size, extra_tree_height, replication_factor, 1024); read_words_from_text(txtFilePath, &word_count, &req_count, prefix_len, keyword_insert[hashalgo], keyword_search[hashalgo]); } diff --git a/src/tests/dart_attr_dist_test.c b/src/tests/dart_attr_dist_test.c index f333d76f2..d1a6d022d 100644 --- a/src/tests/dart_attr_dist_test.c +++ b/src/tests/dart_attr_dist_test.c @@ -259,7 +259,8 @@ main(int argc, char *argv[]) if (j % size == rank) { // insert object reference into DART timer_start(&timer_dart); - PDC_Client_insert_obj_ref_into_dart(hash_algo, key, value, ref_type, j); + PDC_Client_insert_obj_ref_into_dart(hash_algo, key, value, strlen(value), PDC_STRING, + ref_type, j); timer_pause(&timer_dart); duration_dart_ms += (double)timer_delta_ms(&timer_dart); } diff --git a/src/tests/dart_func_test.c b/src/tests/dart_func_test.c index 8d29379b8..14d829b0f 100644 --- a/src/tests/dart_func_test.c +++ b/src/tests/dart_func_test.c @@ -54,7 +54,7 @@ main(int argc, char **argv) char * value = "1234"; uint64_t data = 12341234; // if (rank == 0) { - PDC_Client_insert_obj_ref_into_dart(hash_algo, key, value, ref_type, data); + PDC_Client_insert_obj_ref_into_dart(hash_algo, key, value, strlen(value), PDC_STRING, ref_type, data); println("[Client_Side_Insert] Insert '%s=%s' for ref %llu", key, value, data); // This is for testing exact search diff --git a/src/tests/dart_test.c b/src/tests/dart_test.c index 25a5e3da0..b6e5515fc 100644 --- a/src/tests/dart_test.c +++ b/src/tests/dart_test.c @@ -216,13 +216,15 @@ main(int argc, char **argv) for (i = 0; i < word_count; i++) { int data = i; - PDC_Client_insert_obj_ref_into_dart(index_type, input_word_list[i], input_word_list[i], ref_type, + PDC_Client_insert_obj_ref_into_dart(index_type, input_word_list[i], input_word_list[i], + strlen(input_word_list[i]), PDC_STRING, ref_type, (uint64_t)data); } for (i = 0; i < word_count; i++) { int data = i; - PDC_Client_delete_obj_ref_from_dart(index_type, input_word_list[i], input_word_list[i], ref_type, + PDC_Client_delete_obj_ref_from_dart(index_type, input_word_list[i], input_word_list[i], + strlen(input_word_list[i]), PDC_STRING, ref_type, (uint64_t)data); } @@ -235,7 +237,8 @@ main(int argc, char **argv) for (i = 0; i < word_count; i++) { timer_start(&detailed_timer); int data = i; - PDC_Client_insert_obj_ref_into_dart(index_type, input_word_list[i], input_word_list[i], ref_type, + PDC_Client_insert_obj_ref_into_dart(index_type, input_word_list[i], input_word_list[i], + strlen(input_word_list[i]), PDC_STRING, ref_type, (uint64_t)data); timer_pause(&detailed_timer); if (round == 1) @@ -547,7 +550,8 @@ main(int argc, char **argv) for (i = 0; i < word_count; i++) { timer_start(&detailed_timer); int data = i; - PDC_Client_delete_obj_ref_from_dart(hash_algo, input_word_list[i], input_word_list[i], ref_type, + PDC_Client_delete_obj_ref_from_dart(hash_algo, input_word_list[i], input_word_list[i], + strlen(input_word_list[i]), PDC_STRING, ref_type, (uint64_t)data); timer_pause(&detailed_timer); if (round == 1) diff --git a/src/tests/kvtag_query_scale_col.c b/src/tests/kvtag_affix_query_scale.c similarity index 82% rename from src/tests/kvtag_query_scale_col.c rename to src/tests/kvtag_affix_query_scale.c index a2a4b8405..cbb9f673c 100644 --- a/src/tests/kvtag_query_scale_col.c +++ b/src/tests/kvtag_affix_query_scale.c @@ -74,7 +74,7 @@ print_usage(char *name) "one query against one tag\n"); printf(" n_selectivity: selectivity, on a 100 scale. \n"); printf(" is_using_dart: 1 for using dart, 0 for not using dart\n"); - printf(" query_type: 0 for exact, 1 for prefix, 2 for suffix, 3 for infix\n"); + printf(" query_type: -1 for no query, 0 for exact, 1 for prefix, 2 for suffix, 3 for infix\n"); printf(" comm_type: 0 for point-to-point, 1 for collective\n"); } @@ -163,6 +163,7 @@ main(int argc, char *argv[]) // 4 for num_exact, 5 for num_range comm_type = atoi(argv[6]); // 0 for point-to-point, 1 for collective + int bypass_query = query_type == -1 ? 1 : 0; // prepare container if (prepare_container(&pdc, &cont_prop, &cont, &obj_prop, my_rank) < 0) { println("fail to prepare container @ line %d", __LINE__); @@ -187,9 +188,10 @@ main(int argc, char *argv[]) my_cnt_round = (int *)calloc(round, sizeof(int)); total_cnt_round = (int *)calloc(round, sizeof(int)); +#ifdef ENABLE_MPI MPI_Barrier(MPI_COMM_WORLD); stime = MPI_Wtime(); - +#endif // This is for adding #rounds tags to the objects. // Each rank will add #rounds tags to #my_obj objects. // With the selectivity, we should be able to control how many objects will be attached with the #round @@ -201,13 +203,17 @@ main(int argc, char *argv[]) char tag_value[64]; snprintf(attr_name, 63, "%03d%03dattr_name%03d%03d", iter, iter, iter, iter); snprintf(tag_value, 63, "%03d%03dtag_value%03d%03d", iter, iter, iter, iter); - kvtag.name = strdup(attr_name); - kvtag.value = (void *)strdup(tag_value); - kvtag.type = PDC_STRING; - kvtag.size = (strlen(tag_value) + 1) * sizeof(char); + kvtag.name = strdup(attr_name); + kvtag.value = (void *)strdup(tag_value); + kvtag.type = PDC_STRING; + kvtag.size = (strlen(tag_value) + 1) * sizeof(char); + pdcid_t meta_id = PDC_obj_get_info(obj_ids[i])->obj_info_pub->meta_id; if (is_using_dart) { - if (PDC_Client_insert_obj_ref_into_dart(hash_algo, kvtag.name, kvtag.value, ref_type, - (uint64_t)obj_ids[i]) < 0) { + if (PDCobj_put_tag(obj_ids[i], kvtag.name, kvtag.value, kvtag.type, kvtag.size) < 0) { + printf("fail to add a kvtag to o%d\n", i + my_obj_s); + } + if (PDC_Client_insert_obj_ref_into_dart(hash_algo, kvtag.name, kvtag.value, kvtag.size, + kvtag.type, ref_type, meta_id) < 0) { printf("fail to add a kvtag to o%d\n", i + my_obj_s); } } @@ -218,17 +224,21 @@ main(int argc, char *argv[]) } free(kvtag.name); free(kvtag.value); - my_cnt_round[iter]++; + // TODO: this is for checking the correctness of the query results. + // my_cnt_round[iter]++; } - if (my_rank == 0 && n_obj > 1000) { + // TODO: why n_obj has to be larger than 1000? + if (my_rank == 0 /*&& n_obj > 1000 */) { println("Rank %d: Added %d kvtag to the %d / %d th object, I'm applying selectivity %d to %d " "objects.\n", my_rank, round, i + 1, my_obj_after_selectivity, selectivity, my_obj); } } +#ifdef ENABLE_MPI MPI_Barrier(MPI_COMM_WORLD); total_time = MPI_Wtime() - stime; +#endif if (my_rank == 0) { println("[TAG Creation] Rank %d: Added %d kvtag to %d objects, time: %.5f ms", my_rank, round, my_obj, @@ -236,22 +246,34 @@ main(int argc, char *argv[]) } #ifdef ENABLE_MPI - for (i = 0; i < round; i++) - MPI_Allreduce(&my_cnt_round[i], &total_cnt_round[i], 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + // TODO: This is for checking the correctness of the query results. + // for (i = 0; i < round; i++) + // MPI_Allreduce(&my_cnt_round[i], &total_cnt_round[i], 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD); #endif + if (bypass_query) { + if (my_rank == 0) { + println("Rank %d: All queries are bypassed.", my_rank); + report_avg_server_profiling_rst(); + } + goto done; + } + // For the queries, we issue #round queries. // The selectivity of each exact query should be #selectivity / 100 * #n_obj. // Namely, if you have 1M objects, selectivity is 10, then each query should return 100K objects. + int iter_round = round; + if (comm_type == 0 && is_using_dart == 0) { + iter_round = 2; + } + for (comm_type = 1; comm_type >= 0; comm_type--) { for (query_type = 0; query_type < 4; query_type++) { perr_t ret_value; - if (comm_type == 0 && is_using_dart == 0) - round = 2; - int round_total = 0; - for (iter = -1; iter < round; iter++) { // -1 is for warm up + int round_total = 0; + for (iter = -1; iter < iter_round; iter++) { // -1 is for warm up #ifdef ENABLE_MPI if (iter == 0) { MPI_Barrier(MPI_COMM_WORLD); @@ -280,31 +302,42 @@ main(int argc, char *argv[]) pdc_ids = NULL; if (is_using_dart) { char *query_string = gen_query_str(&output); - ret_value = (comm_type == 0) +#ifdef ENABLE_MPI + ret_value = (comm_type == 0) ? PDC_Client_search_obj_ref_through_dart(hash_algo, query_string, ref_type, &nres, &pdc_ids) : PDC_Client_search_obj_ref_through_dart_mpi( hash_algo, query_string, ref_type, &nres, &pdc_ids, MPI_COMM_WORLD); +#else + ret_value = PDC_Client_search_obj_ref_through_dart(hash_algo, query_string, ref_type, + &nres, &pdc_ids); +#endif } else { kvtag.name = output.key_query; kvtag.value = output.value_query; - /* fprintf(stderr, " Rank %d: key [%s] value [%s]\n", my_rank, kvtag.name, - * kvtag.value); */ + kvtag.size = (strlen(kvtag.value) + 1) * sizeof(char); + +#ifdef ENABLE_MPI ret_value = (comm_type == 0) ? PDC_Client_query_kvtag(&kvtag, &nres, &pdc_ids) : PDC_Client_query_kvtag_mpi(&kvtag, &nres, &pdc_ids, MPI_COMM_WORLD); +#else + ret_value = PDC_Client_query_kvtag(&kvtag, &nres, &pdc_ids); +#endif } if (ret_value < 0) { printf("fail to query kvtag [%s] with rank %d\n", kvtag.name, my_rank); break; } - if (iter >= 0) { - if (nres != total_cnt_round[iter]) - printf("Rank %d: query %d, comm %d, round %d - results %d do not match expected %d\n", - my_rank, query_type, comm_type, iter, nres, total_cnt_round[iter]); - } + // TODO: This is for checking the correctness of the query results. + // if (iter >= 0) { + // if (nres != total_cnt_round[iter]) + // printf("Rank %d: query %d, comm %d, round %d - results %d do not match expected + // %d\n", + // my_rank, query_type, comm_type, iter, nres, total_cnt_round[iter]); + // } round_total += nres; free(kvtag.name); @@ -338,8 +371,11 @@ main(int argc, char *argv[]) } // delete all tags + +#ifdef ENABLE_MPI MPI_Barrier(MPI_COMM_WORLD); stime = MPI_Wtime(); +#endif my_obj_after_selectivity = my_obj * selectivity / 100; for (i = 0; i < my_obj_after_selectivity; i++) { @@ -348,13 +384,14 @@ main(int argc, char *argv[]) char tag_value[64]; snprintf(attr_name, 63, "%03d%03dattr_name%03d%03d", iter, iter, iter, iter); snprintf(tag_value, 63, "%03d%03dtag_value%03d%03d", iter, iter, iter, iter); - kvtag.name = strdup(attr_name); - kvtag.value = (void *)strdup(tag_value); - kvtag.type = PDC_STRING; - kvtag.size = (strlen(tag_value) + 1) * sizeof(char); + kvtag.name = strdup(attr_name); + kvtag.value = (void *)strdup(tag_value); + kvtag.type = PDC_STRING; + kvtag.size = (strlen(tag_value) + 1) * sizeof(char); + pdcid_t meta_id = PDC_obj_get_info(obj_ids[i])->obj_info_pub->meta_id; if (is_using_dart) { - PDC_Client_delete_obj_ref_from_dart(hash_algo, kvtag.name, (char *)kvtag.value, ref_type, - (uint64_t)obj_ids[i]); + PDC_Client_delete_obj_ref_from_dart(hash_algo, kvtag.name, (char *)kvtag.value, kvtag.size, + kvtag.type, ref_type, meta_id); } else { PDCobj_del_tag(obj_ids[i], kvtag.name); @@ -364,13 +401,16 @@ main(int argc, char *argv[]) } } +#ifdef ENABLE_MPI MPI_Barrier(MPI_COMM_WORLD); total_time = MPI_Wtime() - stime; +#endif if (my_rank == 0) { println("[TAG Deletion] Rank %d: Deleted %d kvtag from %d objects, time: %.5f ms", my_rank, round, my_obj, total_time * 1000.0); } +done: // close a container if (PDCcont_close(cont) < 0) { if (my_rank == 0) { @@ -407,7 +447,7 @@ main(int argc, char *argv[]) if (my_rank == 0) printf("fail to close PDC\n"); } -done: + #ifdef ENABLE_MPI MPI_Finalize(); #endif diff --git a/src/tests/kvtag_query.c b/src/tests/kvtag_query.c index 3a6038d3f..fb58b2e20 100644 --- a/src/tests/kvtag_query.c +++ b/src/tests/kvtag_query.c @@ -161,8 +161,8 @@ main(int argc, char *argv[]) v = iter; sprintf(value, "%d", v); if (is_using_dart) { - if (PDC_Client_insert_obj_ref_into_dart(hash_algo, kvtag.name, value, ref_type, - (uint64_t)obj_ids[i]) < 0) { + if (PDC_Client_insert_obj_ref_into_dart(hash_algo, kvtag.name, value, strlen(value), + PDC_STRING, ref_type, (uint64_t)obj_ids[i]) < 0) { printf("fail to add a kvtag to o%d\n", i + my_obj_s); } } diff --git a/src/tests/kvtag_query_mpi.c b/src/tests/kvtag_query_mpi.c deleted file mode 100644 index 22c619c97..000000000 --- a/src/tests/kvtag_query_mpi.c +++ /dev/null @@ -1,207 +0,0 @@ -/* - * Copyright Notice for - * Proactive Data Containers (PDC) Software Library and Utilities - * ----------------------------------------------------------------------------- - - *** Copyright Notice *** - - * Proactive Data Containers (PDC) Copyright (c) 2017, The Regents of the - * University of California, through Lawrence Berkeley National Laboratory, - * UChicago Argonne, LLC, operator of Argonne National Laboratory, and The HDF - * Group (subject to receipt of any required approvals from the U.S. Dept. of - * Energy). All rights reserved. - - * If you have questions about your rights to use or distribute this software, - * please contact Berkeley Lab's Innovation & Partnerships Office at IPO@lbl.gov. - - * NOTICE. This Software was developed under funding from the U.S. Department of - * Energy and the U.S. Government consequently retains certain rights. As such, the - * U.S. Government has been granted for itself and others acting on its behalf a - * paid-up, nonexclusive, irrevocable, worldwide license in the Software to - * reproduce, distribute copies to the public, prepare derivative works, and - * perform publicly and display publicly, and to permit other to do so. - */ - -#include -#include -#include -#include -#include -#include "pdc.h" -#include "pdc_client_connect.h" - -int -assign_work_to_rank(int rank, int size, int nwork, int *my_count, int *my_start) -{ - if (rank > size || my_count == NULL || my_start == NULL) { - printf("assign_work_to_rank(): Error with input!\n"); - return -1; - } - if (nwork < size) { - if (rank < nwork) - *my_count = 1; - else - *my_count = 0; - (*my_start) = rank * (*my_count); - } - else { - (*my_count) = nwork / size; - (*my_start) = rank * (*my_count); - - // Last few ranks may have extra work - if (rank >= size - nwork % size) { - (*my_count)++; - (*my_start) += (rank - (size - nwork % size)); - } - } - - return 1; -} - -void -print_usage(char *name) -{ - printf("%s n_obj n_query\n", name); -} - -int -main(int argc, char *argv[]) -{ - pdcid_t pdc, cont_prop, cont, obj_prop; - pdcid_t * obj_ids; - int n_obj, n_add_tag, my_obj, my_obj_s, my_add_tag, my_add_tag_s; - int proc_num, my_rank, i, v, iter, round; - char obj_name[128]; - double stime, total_time; - pdc_kvtag_t kvtag; - uint64_t * pdc_ids; - int nres, ntotal; - -#ifdef ENABLE_MPI - MPI_Init(&argc, &argv); - MPI_Comm_size(MPI_COMM_WORLD, &proc_num); - MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); -#endif - - if (argc < 3) { - if (my_rank == 0) - print_usage(argv[0]); - goto done; - } - n_obj = atoi(argv[1]); - round = atoi(argv[2]); - n_add_tag = n_obj / 100; - - // create a pdc - pdc = PDCinit("pdc"); - - // create a container property - cont_prop = PDCprop_create(PDC_CONT_CREATE, pdc); - if (cont_prop <= 0) - printf("Fail to create container property @ line %d!\n", __LINE__); - - // create a container - cont = PDCcont_create("c1", cont_prop); - if (cont <= 0) - printf("Fail to create container @ line %d!\n", __LINE__); - - // create an object property - obj_prop = PDCprop_create(PDC_OBJ_CREATE, pdc); - if (obj_prop <= 0) - printf("Fail to create object property @ line %d!\n", __LINE__); - - // Create a number of objects, add at least one tag to that object - assign_work_to_rank(my_rank, proc_num, n_obj, &my_obj, &my_obj_s); - if (my_rank == 0) - printf("I will create %d obj\n", my_obj); - obj_ids = (pdcid_t *)calloc(my_obj, sizeof(pdcid_t)); - for (i = 0; i < my_obj; i++) { - sprintf(obj_name, "obj%d", my_obj_s + i); - obj_ids[i] = PDCobj_create(cont, obj_name, obj_prop); - if (obj_ids[i] <= 0) { - printf("Fail to create object @ line %d!\n", __LINE__); - goto done; - } - } - - if (my_rank == 0) - printf("Created %d objects\n", n_obj); - fflush(stdout); - - // Add tags - kvtag.name = "Group"; - kvtag.value = (void *)&v; - kvtag.type = PDC_INT; - kvtag.size = sizeof(int); - - for (iter = 0; iter < round; iter++) { - assign_work_to_rank(my_rank, proc_num, n_add_tag, &my_add_tag, &my_add_tag_s); - - v = iter; - for (i = 0; i < my_add_tag; i++) { - if (PDCobj_put_tag(obj_ids[i], kvtag.name, kvtag.value, kvtag.type, kvtag.size) < 0) { - printf("fail to add a kvtag to o%d\n", i + my_obj_s); - goto done; - } - } - - if (my_rank == 0) - printf("Rank %d: Added a kvtag to %d objects\n", my_rank, my_add_tag); - fflush(stdout); - -#ifdef ENABLE_MPI - MPI_Barrier(MPI_COMM_WORLD); -#endif - n_add_tag *= 2; - } - - n_add_tag = n_obj / 100; - - for (iter = 0; iter < round; iter++) { - v = iter; - -#ifdef ENABLE_MPI - MPI_Barrier(MPI_COMM_WORLD); - stime = MPI_Wtime(); -#endif - - if (PDC_Client_query_kvtag_mpi(&kvtag, &nres, &pdc_ids, MPI_COMM_WORLD) < 0) { - printf("fail to query kvtag [%s] with rank %d\n", kvtag.name, my_rank); - break; - } - - if (nres != n_add_tag) - printf("Rank %d: query result %d doesn't match expected %d\n", my_rank, nres, n_add_tag); - -#ifdef ENABLE_MPI - MPI_Barrier(MPI_COMM_WORLD); - total_time = MPI_Wtime() - stime; -#endif - if (my_rank == 0) - printf("Total time to query %d objects with tag: %.5e\n", nres, total_time); - fflush(stdout); - n_add_tag *= 2; - } - - // close a container - if (PDCcont_close(cont) < 0) - printf("fail to close container c1\n"); - - // close an object property - if (PDCprop_close(obj_prop) < 0) - printf("Fail to close property @ line %d\n", __LINE__); - - // close a container property - if (PDCprop_close(cont_prop) < 0) - printf("Fail to close property @ line %d\n", __LINE__); - - // close pdc - if (PDCclose(pdc) < 0) - printf("fail to close PDC\n"); -done: -#ifdef ENABLE_MPI - MPI_Finalize(); -#endif - - return 0; -} diff --git a/src/tests/kvtag_query_scale.c b/src/tests/kvtag_query_scale.c deleted file mode 100644 index 99336cebc..000000000 --- a/src/tests/kvtag_query_scale.c +++ /dev/null @@ -1,243 +0,0 @@ -/* - * Copyright Notice for - * Proactive Data Containers (PDC) Software Library and Utilities - * ----------------------------------------------------------------------------- - - *** Copyright Notice *** - - * Proactive Data Containers (PDC) Copyright (c) 2017, The Regents of the - * University of California, through Lawrence Berkeley National Laboratory, - * UChicago Argonne, LLC, operator of Argonne National Laboratory, and The HDF - * Group (subject to receipt of any required approvals from the U.S. Dept. of - * Energy). All rights reserved. - - * If you have questions about your rights to use or distribute this software, - * please contact Berkeley Lab's Innovation & Partnerships Office at IPO@lbl.gov. - - * NOTICE. This Software was developed under funding from the U.S. Department of - * Energy and the U.S. Government consequently retains certain rights. As such, the - * U.S. Government has been granted for itself and others acting on its behalf a - * paid-up, nonexclusive, irrevocable, worldwide license in the Software to - * reproduce, distribute copies to the public, prepare derivative works, and - * perform publicly and display publicly, and to permit other to do so. - */ - -#include -#include -#include -#include -#include -#include "pdc.h" -#include "pdc_client_connect.h" - -int -assign_work_to_rank(int rank, int size, int nwork, int *my_count, int *my_start) -{ - if (rank > size || my_count == NULL || my_start == NULL) { - printf("assign_work_to_rank(): Error with input!\n"); - return -1; - } - if (nwork < size) { - if (rank < nwork) - *my_count = 1; - else - *my_count = 0; - (*my_start) = rank * (*my_count); - } - else { - (*my_count) = nwork / size; - (*my_start) = rank * (*my_count); - - // Last few ranks may have extra work - if (rank >= size - nwork % size) { - (*my_count)++; - (*my_start) += (rank - (size - nwork % size)); - } - } - - return 1; -} - -void -print_usage(char *name) -{ - printf("%s n_obj n_round n_selectivity is_using_dart\n", name); - printf("Summary: This test will create n_obj objects, and add n_selectivity tags to each object. Then it " - "will " - "perform n_round point-to-point queries against the tags, each query from each client should get " - "a whole result set.\n"); - printf("Parameters:\n"); - printf(" n_obj: number of objects\n"); - printf(" n_round: number of rounds, it can be the total number of tags too, as each round will perform " - "one query against one tag\n"); - printf(" n_selectivity: selectivity, on a 100 scale. \n"); - printf(" is_using_dart: 1 for using dart, 0 for not using dart\n"); -} - -int -main(int argc, char *argv[]) -{ - pdcid_t pdc, cont_prop, cont, obj_prop; - pdcid_t * obj_ids; - int n_obj, n_add_tag, my_obj, my_obj_s, my_add_tag, my_add_tag_s; - int proc_num, my_rank, i, v, iter, round, selectivity, is_using_dart, query_type; - char obj_name[128]; - double stime, total_time; - pdc_kvtag_t kvtag; - uint64_t * pdc_ids; - int nres, ntotal; - -#ifdef ENABLE_MPI - MPI_Init(&argc, &argv); - MPI_Comm_size(MPI_COMM_WORLD, &proc_num); - MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); -#endif - - if (argc < 6) { - if (my_rank == 0) - print_usage(argv[0]); - goto done; - } - n_obj = atoi(argv[1]); - round = atoi(argv[2]); - selectivity = atoi(argv[3]); - is_using_dart = atoi(argv[4]); - query_type = atoi(argv[5]); - n_add_tag = n_obj * selectivity / 100; - - // create a pdc - pdc = PDCinit("pdc"); - - // create a container property - cont_prop = PDCprop_create(PDC_CONT_CREATE, pdc); - if (cont_prop <= 0) - printf("Fail to create container property @ line %d!\n", __LINE__); - - // create a container - cont = PDCcont_create("c1", cont_prop); - if (cont <= 0) - printf("Fail to create container @ line %d!\n", __LINE__); - - // create an object property - obj_prop = PDCprop_create(PDC_OBJ_CREATE, pdc); - if (obj_prop <= 0) - printf("Fail to create object property @ line %d!\n", __LINE__); - - // Create a number of objects, add at least one tag to that object - assign_work_to_rank(my_rank, proc_num, n_obj, &my_obj, &my_obj_s); - if (my_rank == 0) - printf("I will create %d obj\n", my_obj); - - obj_ids = (pdcid_t *)calloc(my_obj, sizeof(pdcid_t)); - for (i = 0; i < my_obj; i++) { - sprintf(obj_name, "obj%d", my_obj_s + i); - obj_ids[i] = PDCobj_create(cont, obj_name, obj_prop); - if (obj_ids[i] <= 0) - printf("Fail to create object @ line %d!\n", __LINE__); - } - - if (my_rank == 0) - printf("Created %d objects\n", n_obj); - fflush(stdout); - - char *attr_name_per_rank = gen_random_strings(1, 6, 8, 26)[0]; - // Add tags - kvtag.name = attr_name_per_rank; - kvtag.value = (void *)&v; - kvtag.type = PDC_INT; - kvtag.size = sizeof(int); - - char key[32]; - char value[32]; - char exact_query[48]; - - dart_object_ref_type_t ref_type = REF_PRIMARY_ID; - dart_hash_algo_t hash_algo = DART_HASH; - - assign_work_to_rank(my_rank, proc_num, n_add_tag, &my_add_tag, &my_add_tag_s); - - // This is for adding #rounds tags to the objects. - for (i = 0; i < my_add_tag; i++) { - for (iter = 0; iter < round; iter++) { - v = iter; - sprintf(value, "%d", v); - if (is_using_dart) { - if (PDC_Client_insert_obj_ref_into_dart(hash_algo, kvtag.name, value, ref_type, - (uint64_t)obj_ids[i]) < 0) { - printf("fail to add a kvtag to o%d\n", i + my_obj_s); - } - } - else { - if (PDCobj_put_tag(obj_ids[i], kvtag.name, kvtag.value, kvtag.type, kvtag.size) < 0) { - printf("fail to add a kvtag to o%d\n", i + my_obj_s); - } - } - } - if (my_rank == 0) - println("Rank %d: Added %d kvtag to the %d th object\n", my_rank, round, i); - } - -#ifdef ENABLE_MPI - MPI_Barrier(MPI_COMM_WORLD); -#endif - - kvtag.name = attr_name_per_rank; - kvtag.value = (void *)&v; - kvtag.type = PDC_INT; - kvtag.size = sizeof(int); - -#ifdef ENABLE_MPI - MPI_Barrier(MPI_COMM_WORLD); - stime = MPI_Wtime(); -#endif - - for (iter = 0; iter < round; iter++) { - v = iter; - if (is_using_dart) { - sprintf(value, "%ld", v); - sprintf(exact_query, "%s=%s", kvtag.name, value); - PDC_Client_search_obj_ref_through_dart(hash_algo, exact_query, ref_type, &nres, &pdc_ids); - } - else { - if (PDC_Client_query_kvtag(&kvtag, &nres, &pdc_ids) < 0) { - printf("fail to query kvtag [%s] with rank %d\n", kvtag.name, my_rank); - break; - } - } - } - -#ifdef ENABLE_MPI - MPI_Barrier(MPI_COMM_WORLD); - total_time = MPI_Wtime() - stime; - - if (my_rank == 0) - println("Total time to query %d objects with tag: %.5f", ntotal, total_time); -#endif - // close a container - if (PDCcont_close(cont) < 0) - printf("fail to close container c1\n"); - else - printf("successfully close container c1\n"); - - // close an object property - if (PDCprop_close(obj_prop) < 0) - printf("Fail to close property @ line %d\n", __LINE__); - else - printf("successfully close object property\n"); - - // close a container property - if (PDCprop_close(cont_prop) < 0) - printf("Fail to close property @ line %d\n", __LINE__); - else - printf("successfully close container property\n"); - - // close pdc - if (PDCclose(pdc) < 0) - printf("fail to close PDC\n"); -done: -#ifdef ENABLE_MPI - MPI_Finalize(); -#endif - - return 0; -} diff --git a/src/tests/kvtag_range_query_scale.c b/src/tests/kvtag_range_query_scale.c new file mode 100644 index 000000000..a6d3b3975 --- /dev/null +++ b/src/tests/kvtag_range_query_scale.c @@ -0,0 +1,477 @@ +/* + * Copyright Notice for + * Proactive Data Containers (PDC) Software Library and Utilities + * ----------------------------------------------------------------------------- + + *** Copyright Notice *** + + * Proactive Data Containers (PDC) Copyright (c) 2017, The Regents of the + * University of California, through Lawrence Berkeley National Laboratory, + * UChicago Argonne, LLC, operator of Argonne National Laboratory, and The HDF + * Group (subject to receipt of any required approvals from the U.S. Dept. of + * Energy). All rights reserved. + + * If you have questions about your rights to use or distribute this software, + * please contact Berkeley Lab's Innovation & Partnerships Office at IPO@lbl.gov. + + * NOTICE. This Software was developed under funding from the U.S. Department of + * Energy and the U.S. Government consequently retains certain rights. As such, the + * U.S. Government has been granted for itself and others acting on its behalf a + * paid-up, nonexclusive, irrevocable, worldwide license in the Software to + * reproduce, distribute copies to the public, prepare derivative works, and + * perform publicly and display publicly, and to permit other to do so. + */ + +#include +#include +#include +#include +#include +#include +#include "pdc.h" +#include "pdc_client_connect.h" +#include "string_utils.h" + +int +assign_work_to_rank(int rank, int size, int nwork, int *my_count, int *my_start) +{ + if (rank > size || my_count == NULL || my_start == NULL) { + printf("assign_work_to_rank(): Error with input!\n"); + return -1; + } + if (nwork < size) { + if (rank < nwork) + *my_count = 1; + else + *my_count = 0; + (*my_start) = rank * (*my_count); + } + else { + (*my_count) = nwork / size; + (*my_start) = rank * (*my_count); + + // Last few ranks may have extra work + if (rank >= size - nwork % size) { + (*my_count)++; + (*my_start) += (rank - (size - nwork % size)); + } + } + + return 1; +} + +void +print_usage(char *name) +{ + printf("%s n_obj n_round n_selectivity is_using_dart query_type comm_type\n", name); + printf("Summary: This test will create n_obj objects, and add n_selectivity tags to each object. Then it " + "will " + "perform n_round collective queries against the tags, each query from each client should get " + "a whole result set.\n"); + printf("Parameters:\n"); + printf(" n_obj: number of objects\n"); + printf(" n_round: number of rounds, it can be the total number of tags too, as each round will perform " + "one query against one tag\n"); + printf(" n_selectivity: selectivity, on a 100 scale. \n"); + printf(" is_using_dart: 1 for using dart, 0 for not using dart\n"); + printf(" query_type: -1 for no query, 0 for exact, 1 for prefix, 2 for suffix, 3 for infix\n"); + printf(" comm_type: 0 for point-to-point, 1 for collective\n"); +} + +perr_t +prepare_container(pdcid_t *pdc, pdcid_t *cont_prop, pdcid_t *cont, pdcid_t *obj_prop, int my_rank) +{ + perr_t ret_value = FAIL; + // create a pdc + *pdc = PDCinit("pdc"); + + // create a container property + *cont_prop = PDCprop_create(PDC_CONT_CREATE, *pdc); + if (*cont_prop <= 0) { + printf("[Client %d] Fail to create container property @ line %d!\n", my_rank, __LINE__); + goto done; + } + // create a container + *cont = PDCcont_create("c1", *cont_prop); + if (*cont <= 0) { + printf("[Client %d] Fail to create container @ line %d!\n", my_rank, __LINE__); + goto done; + } + + // create an object property + *obj_prop = PDCprop_create(PDC_OBJ_CREATE, *pdc); + if (*obj_prop <= 0) { + printf("[Client %d] Fail to create object property @ line %d!\n", my_rank, __LINE__); + goto done; + } + + ret_value = SUCCEED; +done: + return ret_value; +} + +perr_t +creating_objects(pdcid_t **obj_ids, int my_obj, int my_obj_s, pdcid_t cont, pdcid_t obj_prop, int my_rank) +{ + perr_t ret_value = FAIL; + char obj_name[128]; + int64_t timestamp = get_timestamp_ms(); + *obj_ids = (pdcid_t *)calloc(my_obj, sizeof(pdcid_t)); + for (int i = 0; i < my_obj; i++) { + sprintf(obj_name, "obj%" PRId64 "%d", timestamp, my_obj_s + i); + // printf("Rank %d: Creating object %s\n", my_rank, obj_name); + (*obj_ids)[i] = PDCobj_create(cont, obj_name, obj_prop); + if ((*obj_ids)[i] <= 0) { + printf("[Client %d] Fail to create object @ line %d!\n", my_rank, __LINE__); + goto done; + } + } + ret_value = SUCCEED; +done: + return ret_value; +} + +int +main(int argc, char *argv[]) +{ + pdcid_t pdc, cont_prop, cont, obj_prop; + pdcid_t * obj_ids; + int n_obj, my_obj, my_obj_s; + int proc_num, my_rank, i, v, iter, round, selectivity, is_using_dart, query_type, comm_type; + double stime, total_time; + pdc_kvtag_t kvtag; + uint64_t * pdc_ids; + int nres, ntotal; + int * my_cnt_round; + int * total_cnt_round; + +#ifdef ENABLE_MPI + MPI_Init(&argc, &argv); + MPI_Comm_size(MPI_COMM_WORLD, &proc_num); + MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); +#endif + + if (argc < 7) { + if (my_rank == 0) + print_usage(argv[0]); + goto done; + } + n_obj = atoi(argv[1]); + round = atoi(argv[2]); + selectivity = atoi(argv[3]); + is_using_dart = atoi(argv[4]); // 0 for no index, 1 for using dart. + query_type = atoi(argv[5]); // 0 for exact, 1 for prefix, 2 for suffix, 3 for infix, + // 4 for num_exact, 5 for num_range + comm_type = atoi(argv[6]); // 0 for point-to-point, 1 for collective + + int bypass_query = query_type == -1 ? 1 : 0; + // prepare container + if (prepare_container(&pdc, &cont_prop, &cont, &obj_prop, my_rank) < 0) { + println("fail to prepare container @ line %d", __LINE__); + goto done; + } + // Create a number of objects, add at least one tag to that object + assign_work_to_rank(my_rank, proc_num, n_obj, &my_obj, &my_obj_s); + + if (my_rank == 0) { + println("Each client will create about %d obj", my_obj); + } + + // creating objects + creating_objects(&obj_ids, my_obj, my_obj_s, cont, obj_prop, my_rank); + + if (my_rank == 0) + println("All clients created %d objects", n_obj); + + dart_object_ref_type_t ref_type = REF_PRIMARY_ID; + dart_hash_algo_t hash_algo = DART_HASH; + + my_cnt_round = (int *)calloc(round, sizeof(int)); + total_cnt_round = (int *)calloc(round, sizeof(int)); + +#ifdef ENABLE_MPI + MPI_Barrier(MPI_COMM_WORLD); + stime = MPI_Wtime(); +#endif + + int total_insert = 0; + // This is for adding #rounds tags to the objects. + // Each rank will add #rounds tags to #my_obj objects. + // With the selectivity, we should be able to control how many objects will be attached with the #round + // tags. So that, each of these #round tags can roughly the same selectivity. + int my_obj_after_selectivity = my_obj * selectivity / 100; + for (i = 0; i < my_obj_after_selectivity; i++) { + for (iter = 0; iter < round; iter++) { + char *attr_name = (char *)calloc(64, sizeof(char)); + // snprintf(attr_name, 63, "%03d%03dattr_name%03d%03d", iter, iter, iter, iter); + snprintf(attr_name, 63, "attr_name"); + kvtag.name = strdup(attr_name); + kvtag.value = malloc(sizeof(int64_t)); + if (kvtag.value == NULL) { + printf("fail to allocate tag_value\n"); + goto done; + } + int64_t iter_val = iter; + memcpy(kvtag.value, &iter_val, sizeof(int64_t)); + kvtag.type = PDC_INT64; + kvtag.size = get_size_by_class_n_type(kvtag.value, 1, PDC_CLS_ITEM, PDC_INT64); + pdcid_t meta_id = PDC_obj_get_info(obj_ids[i])->obj_info_pub->meta_id; + if (is_using_dart) { + if (PDCobj_put_tag(obj_ids[i], kvtag.name, kvtag.value, kvtag.type, kvtag.size) < 0) { + printf("fail to add a kvtag to o%d\n", i + my_obj_s); + } + // NOTE: object ID is a local ID, we need to get the global object metadata ID + if (PDC_Client_insert_obj_ref_into_dart(hash_algo, kvtag.name, kvtag.value, kvtag.size, + kvtag.type, ref_type, meta_id) < 0) { + printf("fail to add a kvtag to o%d\n", i + my_obj_s); + } + total_insert++; + } + else { + if (PDCobj_put_tag(obj_ids[i], kvtag.name, kvtag.value, kvtag.type, kvtag.size) < 0) { + printf("fail to add a kvtag to o%d\n", i + my_obj_s); + } + } + // printf("Rank %d: Added kvtag \"%s\": %" PRId64 " -> %" PRIu64 "\n", my_rank, kvtag.name, + // *((int64_t *)kvtag.value), meta_id); + free(kvtag.name); + free(kvtag.value); + // TODO: this is for checking the correctness of the query results. + my_cnt_round[iter]++; + } + // TODO: why n_obj has to be larger than 1000? + if (my_rank == 0 /*&& n_obj > 1000 */) { + println("Rank %d: Added %d kvtag to the %d / %d th object, I'm applying selectivity %d to %d " + "objects.\n", + my_rank, round, i + 1, my_obj_after_selectivity, selectivity, my_obj); + } + } + +#ifdef ENABLE_MPI + MPI_Barrier(MPI_COMM_WORLD); + total_time = MPI_Wtime() - stime; +#endif + + if (my_rank == 0) { + println("[TAG Creation] Rank %d: Added %d kvtag to %d objects, time: %.5f ms, dart_insert_count=%d", + my_rank, round, my_obj, total_time * 1000.0, get_dart_insert_count()); + } + +#ifdef ENABLE_MPI + // TODO: This is for checking the correctness of the query results. + for (i = 0; i < round; i++) + MPI_Allreduce(&my_cnt_round[i], &total_cnt_round[i], 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + + MPI_Barrier(MPI_COMM_WORLD); +#endif + + if (bypass_query) { + if (my_rank == 0) { + println("Rank %d: All queries are bypassed.", my_rank); + report_avg_server_profiling_rst(); + } + goto done; + } + + // For the queries, we issue #round queries. + // The selectivity of each exact query should be #selectivity / 100 * #n_obj. + // Namely, if you have 1M objects, selectivity is 10, then each query should return 100K objects. + int iter_round = round; + if (comm_type == 0 && is_using_dart == 0) { + iter_round = 2; + } + + for (comm_type = 1; comm_type >= 0; comm_type--) { + for (query_type = 4; query_type < 6; query_type++) { + perr_t ret_value; + int round_total = 0; + for (iter = -1; iter < iter_round; iter++) { // -1 is for warm up +#ifdef ENABLE_MPI + if (iter == 0) { + MPI_Barrier(MPI_COMM_WORLD); + stime = MPI_Wtime(); + } +#endif + char * attr_name = (char *)calloc(64, sizeof(char)); + int64_t *tag_value; + // snprintf(attr_name, 63, "%03d%03dattr_name%03d%03d", iter, iter, iter, iter); + snprintf(attr_name, 63, "attr_name"); + tag_value = malloc(sizeof(int64_t)); + tag_value[0] = (int64_t)iter; + + kvtag.name = strdup(attr_name); + kvtag.value = tag_value; + kvtag.type = PDC_INT64; + kvtag.size = get_size_by_class_n_type(tag_value, 1, PDC_CLS_ITEM, PDC_INT64); + + query_gen_input_t input; + query_gen_output_t output; + input.base_tag = &kvtag; + input.key_query_type = 0; + input.range_lo = iter; + input.range_hi = iter + 5; + input.value_query_type = query_type; + input.affix_len = 12; + + gen_query_key_value(&input, &output); + + pdc_ids = NULL; + if (is_using_dart) { + char *query_string = gen_query_str(&output); +#ifdef ENABLE_MPI + ret_value = (comm_type == 0) + ? PDC_Client_search_obj_ref_through_dart(hash_algo, query_string, + ref_type, &nres, &pdc_ids) + : PDC_Client_search_obj_ref_through_dart_mpi( + hash_algo, query_string, ref_type, &nres, &pdc_ids, MPI_COMM_WORLD); +#else + ret_value = PDC_Client_search_obj_ref_through_dart(hash_algo, query_string, ref_type, + &nres, &pdc_ids); +#endif + } + else { + kvtag.name = output.key_query; + kvtag.value = output.value_query; + /* fprintf(stderr, " Rank %d: key [%s] value [%s]\n", my_rank, kvtag.name, + * kvtag.value); */ + +#ifdef ENABLE_MPI + ret_value = (comm_type == 0) + ? PDC_Client_query_kvtag(&kvtag, &nres, &pdc_ids) + : PDC_Client_query_kvtag_mpi(&kvtag, &nres, &pdc_ids, MPI_COMM_WORLD); +#else + ret_value = PDC_Client_query_kvtag(&kvtag, &nres, &pdc_ids); +#endif + } + if (ret_value < 0) { + printf("fail to query kvtag [%s] with rank %d\n", kvtag.name, my_rank); + break; + } + + // TODO: This is for checking the correctness of the query results. + if (iter >= 0) { + if (nres != total_cnt_round[iter]) + printf( + "Rank %d: query %d, comm %d, round %d - results %d do not match expected %d\n ", + my_rank, query_type, comm_type, iter, nres, total_cnt_round[iter]); + } + + round_total += nres; + free(kvtag.name); + free(kvtag.value); + } + +#ifdef ENABLE_MPI + MPI_Barrier(MPI_COMM_WORLD); + // MPI_Reduce(&round_total, &ntotal, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + total_time = MPI_Wtime() - stime; + + if (my_rank == 0) { + char *query_type_str = "EXACT"; + if (query_type == 4) + query_type_str = "EXACT"; + else if (query_type == 5) + query_type_str = "RANGE"; + println("[%s Client %s Query with%sINDEX] %d rounds with %d results, time: %.5f ms", + comm_type == 0 ? "Single" : "Multi", query_type_str, + is_using_dart == 0 ? " NO " : " DART ", round, round_total, total_time * 1000.0); + } +#endif + } // end query type + } // end comm type + + if (my_rank == 0) { + println("Rank %d: All queries are done.", my_rank); + report_avg_server_profiling_rst(); + } + + // delete all tags + +#ifdef ENABLE_MPI + MPI_Barrier(MPI_COMM_WORLD); + stime = MPI_Wtime(); +#endif + + my_obj_after_selectivity = my_obj * selectivity / 100; + for (i = 0; i < my_obj_after_selectivity; i++) { + for (iter = 0; iter < round; iter++) { + char attr_name[64]; + char tag_value[64]; + // snprintf(attr_name, 63, "%03d%03dattr_name%03d%03d", iter, iter, iter, iter); + snprintf(attr_name, 63, "attr_name"); + kvtag.name = strdup(attr_name); + kvtag.value = malloc(sizeof(int64_t)); + if (kvtag.value == NULL) { + printf("fail to allocate tag_value\n"); + goto done; + } + int64_t iter_val = iter; + memcpy(kvtag.value, &iter_val, sizeof(int64_t)); + kvtag.type = PDC_INT64; + kvtag.size = (strlen(tag_value) + 1) * sizeof(char); + if (is_using_dart) { + pdcid_t meta_id = PDC_obj_get_info(obj_ids[i])->obj_info_pub->meta_id; + PDC_Client_delete_obj_ref_from_dart(hash_algo, kvtag.name, (char *)kvtag.value, kvtag.size, + kvtag.type, ref_type, meta_id); + } + else { + PDCobj_del_tag(obj_ids[i], kvtag.name); + } + free(kvtag.name); + free(kvtag.value); + } + } + +#ifdef ENABLE_MPI + MPI_Barrier(MPI_COMM_WORLD); + total_time = MPI_Wtime() - stime; +#endif + if (my_rank == 0) { + println("[TAG Deletion] Rank %d: Deleted %d kvtag from %d objects, time: %.5f ms", my_rank, round, + my_obj, total_time * 1000.0); + } + +done: + // close a container + if (PDCcont_close(cont) < 0) { + if (my_rank == 0) { + printf("fail to close container c1\n"); + } + } + else { + if (my_rank == 0) + printf("successfully close container c1\n"); + } + + // close an object property + if (PDCprop_close(obj_prop) < 0) { + if (my_rank == 0) + printf("Fail to close property @ line %d\n", __LINE__); + } + else { + if (my_rank == 0) + printf("successfully close object property\n"); + } + + // close a container property + if (PDCprop_close(cont_prop) < 0) { + if (my_rank == 0) + printf("Fail to close property @ line %d\n", __LINE__); + } + else { + if (my_rank == 0) + printf("successfully close container property\n"); + } + + // close pdc + if (PDCclose(pdc) < 0) { + if (my_rank == 0) + printf("fail to close PDC\n"); + } + +#ifdef ENABLE_MPI + MPI_Finalize(); +#endif + + return 0; +} diff --git a/src/tests/llsm_idioms_bench.c b/src/tests/llsm_idioms_bench.c new file mode 100644 index 000000000..ab975134d --- /dev/null +++ b/src/tests/llsm_idioms_bench.c @@ -0,0 +1,621 @@ +#include +#include +#include +#include +#include +#include +#include "pdc.h" +#include "pdc_client_connect.h" +#include "string_utils.h" + +#define MAX_LINE_LENGTH 1024 +#define MAX_COLUMNS 8 +#define MAX_ROWS_TO_READ 1000 +#define DUPLICATE_ROWS 1000 + +int +assign_work_to_rank(int rank, int size, int nwork, int *my_count, int *my_start) +{ + if (rank > size || my_count == NULL || my_start == NULL) { + printf("assign_work_to_rank(): Error with input!\n"); + return -1; + } + if (nwork < size) { + if (rank < nwork) + *my_count = 1; + else + *my_count = 0; + (*my_start) = rank * (*my_count); + } + else { + (*my_count) = nwork / size; + (*my_start) = rank * (*my_count); + + // Last few ranks may have extra work + if (rank >= size - nwork % size) { + (*my_count)++; + (*my_start) += (rank - (size - nwork % size)); + } + } + + return 1; +} + +void +print_usage(char *name) +{ + printf("%s n_obj n_round n_selectivity is_using_dart query_type comm_type\n", name); + printf("Summary: This test will create n_obj objects, and add n_selectivity tags to each object. Then it " + "will " + "perform n_round collective queries against the tags, each query from each client should get " + "a whole result set.\n"); + printf("Parameters:\n"); + printf(" n_obj: number of objects\n"); + printf(" n_round: number of rounds, it can be the total number of tags too, as each round will perform " + "one query against one tag\n"); + printf(" n_selectivity: selectivity, on a 100 scale. \n"); + printf(" is_using_dart: 1 for using dart, 0 for not using dart\n"); + printf(" query_type: -1 for no query, 0 for exact, 1 for prefix, 2 for suffix, 3 for infix\n"); + printf(" comm_type: 0 for point-to-point, 1 for collective\n"); +} + +perr_t +prepare_container(pdcid_t *pdc, pdcid_t *cont_prop, pdcid_t *cont, pdcid_t *obj_prop, int my_rank) +{ + perr_t ret_value = FAIL; + // create a pdc + *pdc = PDCinit("pdc"); + + // create a container property + *cont_prop = PDCprop_create(PDC_CONT_CREATE, *pdc); + if (*cont_prop <= 0) { + printf("[Client %d] Fail to create container property @ line %d!\n", my_rank, __LINE__); + goto done; + } + // create a container + *cont = PDCcont_create("c1", *cont_prop); + if (*cont <= 0) { + printf("[Client %d] Fail to create container @ line %d!\n", my_rank, __LINE__); + goto done; + } + + // create an object property + *obj_prop = PDCprop_create(PDC_OBJ_CREATE, *pdc); + if (*obj_prop <= 0) { + printf("[Client %d] Fail to create object property @ line %d!\n", my_rank, __LINE__); + goto done; + } + + ret_value = SUCCEED; +done: + return ret_value; +} + +size_t +create_objects(pdcid_t **obj_ids, int my_csv_rows, int csv_expand_factor, pdcid_t cont, pdcid_t obj_prop, + int my_rank) +{ + size_t obj_created = 0; + char obj_name[128]; + int64_t timestamp = get_timestamp_us(); + + *obj_ids = (pdcid_t *)calloc(my_csv_rows * csv_expand_factor, sizeof(pdcid_t)); + for (int i = 0; i < my_csv_rows; i++) { + // create `csv_expansion_factor` data objects for each csv row. + for (int obj_idx = 0; obj_idx < csv_expand_factor; obj_idx++) { + sprintf(obj_name, "obj%" PRId64 "%d", timestamp, obj_created); + (*obj_ids)[obj_created] = PDCobj_create(cont, obj_name, obj_prop); + + if ((*obj_ids)[obj_created] <= 0) { + printf("[Client %d] Fail to create object @ line %d!\n", my_rank, __LINE__); + goto done; + } + + obj_created++; + } + } +done: + return obj_created; +} + +// Function to split a line into tokens based on the delimiter +int +split_line(char *line, char delimiter, char **tokens, int max_tokens) +{ + int count = 0; + char *token; + + token = strtok(line, &delimiter); + while (token != NULL && count < max_tokens) { + strncpy(tokens[count], token, strlen(token) + 1); + count++; + token = strtok(NULL, &delimiter); + } + + return count; +} + +int +read_lines_to_buffer(const char *filename, char **buffer, int num_lines, size_t *buffer_size) +{ + FILE *file = fopen(filename, "r"); + if (file == NULL) { + perror("Error opening file"); + return -1; + } + + const size_t max_line_length = 1024; + char line[max_line_length]; + size_t total_size = 0; + + // Calculate the total size required + for (int i = 0; i < num_lines && fgets(line, max_line_length, file) != NULL; i++) { + total_size += strlen(line); + } + + // Allocate the buffer + *buffer = (char *)calloc(total_size, sizeof(char) + 1); + if (*buffer == NULL) { + perror("Failed to allocate buffer"); + fclose(file); + return -1; + } + + // Reset file pointer to the beginning of the file + fseek(file, 0, SEEK_SET); + + // Read lines into the buffer + char *buf_ptr = *buffer; + for (int i = 0; i < num_lines && fgets(buf_ptr, max_line_length, file) != NULL; i++) { + size_t len = strlen(buf_ptr); + buf_ptr += len; // Move pointer to the end of the read line + } + + *buffer_size = total_size; + fclose(file); + + return 0; +} + +typedef int (*process_tag_of_one_object)(pid_t obj_id, char *attr_name, char *attr_value, int is_using_dart); + +int +add_tag_to_one_object(pid_t obj_id, char *attr_name, char *attr_value, int is_using_dart) +{ + pdc_kvtag_t kvtag; + kvtag.name = strdup(attr_name); + kvtag.value = (void *)strdup(attr_value); + kvtag.type = PDC_STRING; + kvtag.size = (strlen(kvtag.value) + 1) * sizeof(char); + + if (PDCobj_put_tag(obj_id, kvtag.name, kvtag.value, kvtag.type, kvtag.size) < 0) { + printf("Fail to add tag to object %" PRIu64 "\n", obj_id); + return -1; + } + if (is_using_dart) { + PDC_Client_insert_obj_ref_into_dart(DART_HASH, kvtag.name, (char *)kvtag.value, kvtag.size, + kvtag.type, REF_PRIMARY_ID, (uint64_t)obj_id); + } + return 0; +} + +int +delete_tag_from_one_object(pid_t obj_id, char *attr_name, char *attr_value, int is_using_dart) +{ + pdc_kvtag_t kvtag; + kvtag.name = strdup(attr_name); + kvtag.value = (void *)strdup(attr_value); + kvtag.type = PDC_STRING; + kvtag.size = (strlen(kvtag.value) + 1) * sizeof(char); + + PDCobj_del_tag(obj_id, kvtag.name); + if (is_using_dart) { + PDC_Client_delete_obj_ref_from_dart(DART_HASH, kvtag.name, (char *)kvtag.value, kvtag.size, + kvtag.type, REF_PRIMARY_ID, (uint64_t)obj_id); + } + return 1; +} + +int +csv_tags_on_objects(pdcid_t *obj_ids, char ***csv_data, char **csv_header, int num_columns, int my_csv_rows, + int csv_expand_factor, int is_using_dart, process_tag_of_one_object tag_processor) +{ + + pdc_kvtag_t kvtag; + size_t tags_processed = 0; + size_t obj_idx = 0; + for (int i = 0; i < my_csv_rows; i++) { + // take one row from csv_data. + char **row = csv_data[i]; + + for (int j = 0; j < csv_expand_factor; j++) { + + char new_iter_value[30]; + sprintf(new_iter_value, "Scan_Iter_%04d", j); + char new_iter_tok[10]; + sprintf(new_iter_tok, "%04dt.tif", j); + + char extra_attr_name[100]; + char extra_attr_value[200]; + + for (int col_idx = 0; col_idx < num_columns; col_idx++) { + char *attr_name = strdup(csv_header[col_idx]); + char *attr_value = strdup(row[col_idx]); + if (strstr(attr_value, "Scan_Iter_0000")) { + char *start = strstr(attr_value, "Scan_Iter_0000"); + strncpy(start, new_iter_value, strlen(new_iter_value)); + char *zerot = strstr(attr_value, "0000t.tif"); + strncpy(zerot, new_iter_tok, strlen(new_iter_tok)); + + if (startsWith(attr_name, "Filepath")) { + sprintf(extra_attr_value, "%s", attr_value); + // remove the last 4 characters, namely '.tif'. + extra_attr_value[strlen(extra_attr_value) - 4] = '\0'; + } + else if (startsWith(attr_name, "Filename")) { + sprintf(extra_attr_name, "%s", attr_value); + // remove the last 4 characters, namely '.tif'. + extra_attr_name[strlen(extra_attr_name) - 4] = '\0'; + } + } + tag_processor(obj_ids[obj_idx], attr_name, attr_value, is_using_dart); + tags_processed++; + } + // add one extra tag + tag_processor(obj_ids[obj_idx], extra_attr_name, extra_attr_value, is_using_dart); + tags_processed++; + obj_idx++; + } + } + return tags_processed; +} + +int +read_csv_from_buffer(char *data, char ***csv_header, char ****csv_data, int *num_columns, int rows_to_read, + int my_rank, int proc_num) +{ + int my_csv_row_num = 0; + // Allocate memory for the header and data + *csv_header = (char **)calloc(MAX_COLUMNS, sizeof(char *)); + *csv_data = (char ***)calloc(rows_to_read / proc_num + 1, sizeof(char **)); + for (int i = 0; i < rows_to_read / proc_num + 1; i++) { + (*csv_data)[i] = (char **)calloc(MAX_COLUMNS, sizeof(char *)); + } + + // Read the header line + char *line = strtok(data, "\n"); + if (line == NULL) { + fprintf(stderr, "Error reading headers from CSV\n"); + return -1; + } + + // Parse the header line + char *header[MAX_COLUMNS]; + *num_columns = split_line(line, ',', *csv_header, MAX_COLUMNS); + + // Read and parse the data lines + int data_line_count = 0; + while (data_line_count < rows_to_read) { + line = strtok(NULL, "\n"); + if (line == NULL) { + fprintf(stderr, "Error reading data from CSV\n"); + // free(buffer); + return -1; + } + if (data_line_count % proc_num == my_rank) { + + // Parse the data line + char *tmp_data[MAX_COLUMNS]; + *num_columns = split_line(line, ',', tmp_data, MAX_COLUMNS); + + // Copy the data into the csv_data array + for (int i = 0; i < *num_columns; i++) { + (*csv_data)[my_csv_row_num][i] = strdup(tmp_data[i]); + } + my_csv_row_num++; + } + + data_line_count++; + } + + return my_csv_row_num; +} + +int +perform_search(int is_using_dart, int query_type, int comm_type, int iter_round) +{ + perr_t ret_value; + int nres = 0; + uint64_t *pdc_ids = NULL; + // perform search + char attr_name[100]; + char tag_value[200]; + snprintf(attr_name, 100, + "Scan_Iter_%04d_CamA_ch0_CAM1_stack0000_488nm_0000000msec_0%3d511977msecAbs_000x_" + "000y_%03dz_%04dt", + iter_round, iter_round, iter_round, iter_round); + snprintf(tag_value, 200, + "/clusterfs/nvme2/Data/20221128_Korra_GaoGroupVisit/Data/20221213_OB_WT/V1_600um//" + "Scan_Iter_%04d_CamA_ch0_CAM1_stack0000_488nm_0000000msec_0%3d511977msecAbs_000x_000y_%03dz_" + "%04dt", + iter_round, iter_round, iter_round, iter_round); + + pdc_kvtag_t kvtag; + kvtag.name = strdup(attr_name); + kvtag.value = (void *)strdup(tag_value); + kvtag.type = PDC_STRING; + kvtag.size = (strlen(tag_value) + 1) * sizeof(char); + + query_gen_input_t input; + query_gen_output_t output; + input.base_tag = &kvtag; + input.key_query_type = query_type; + input.value_query_type = query_type; + input.affix_len = 14; + + gen_query_key_value(&input, &output); + + if (is_using_dart) { + char *query_string = gen_query_str(&output); +#ifdef ENABLE_MPI + ret_value = (comm_type == 0) + ? PDC_Client_search_obj_ref_through_dart(DART_HASH, query_string, REF_PRIMARY_ID, + &nres, &pdc_ids) + : PDC_Client_search_obj_ref_through_dart_mpi(DART_HASH, query_string, REF_PRIMARY_ID, + &nres, &pdc_ids, MPI_COMM_WORLD); +#else + ret_value = + PDC_Client_search_obj_ref_through_dart(DART_HASH, query_string, REF_PRIMARY_ID, &nres, &pdc_ids); +#endif + } + else { + kvtag.name = output.key_query; + kvtag.value = output.value_query; + /* fprintf(stderr, " Rank %d: key [%s] value [%s]\n", my_rank, kvtag.name, + * kvtag.value); */ + +#ifdef ENABLE_MPI + ret_value = (comm_type == 0) ? PDC_Client_query_kvtag(&kvtag, &nres, &pdc_ids) + : PDC_Client_query_kvtag_mpi(&kvtag, &nres, &pdc_ids, MPI_COMM_WORLD); +#else + ret_value = PDC_Client_query_kvtag(&kvtag, &nres, &pdc_ids); +#endif + } + + return nres; +} + +int +main(int argc, char *argv[]) +{ + pdcid_t pdc, cont_prop, cont, obj_prop; + pdcid_t * obj_ids; + int n_obj, my_csv_rows, num_columns; + int proc_num, my_rank, i, v, iter, round, csv_expand_factor, is_using_dart, query_type, comm_type; + double stime, total_time; + pdc_kvtag_t kvtag; + uint64_t * pdc_ids; + int nres, ntotal; + int * my_cnt_round; + int * total_cnt_round; + +#ifdef ENABLE_MPI + MPI_Init(&argc, &argv); + MPI_Comm_size(MPI_COMM_WORLD, &proc_num); + MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); +#endif + + if (argc < 8) { + if (my_rank == 0) + print_usage(argv[0]); + goto done; + } + n_obj = atoi(argv[1]); + round = atoi(argv[2]); + csv_expand_factor = atoi(argv[3]); + is_using_dart = atoi(argv[4]); // 0 for no index, 1 for using dart. + query_type = atoi(argv[5]); // 0 for exact, 1 for prefix, 2 for suffix, 3 for infix, + // 4 for num_exact, 5 for num_range + comm_type = atoi(argv[6]); // 0 for point-to-point, 1 for collective + char *file_name = argv[7]; + + int rows_to_read = n_obj / csv_expand_factor; // read 1 more row which is the header + + int bypass_query = query_type == -1 ? 1 : 0; + // prepare container + if (prepare_container(&pdc, &cont_prop, &cont, &obj_prop, my_rank) < 0) { + println("fail to prepare container @ line %d", __LINE__); + goto done; + } + + // ********************** Read and Broadcast first few rows of CSV file ********************** + char * data = NULL; + size_t data_size = 0; + + if (my_rank == 0) { + if (read_lines_to_buffer(file_name, &data, rows_to_read + 1, &data_size) != 0) { + fprintf(stderr, "Failed to read lines from the file\n"); +#ifdef ENABLE_MPI + MPI_Abort(MPI_COMM_WORLD, 1); +#endif + } + } +#ifdef ENABLE_MPI + // Broadcast the buffer size first + MPI_Bcast(&data_size, 1, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD); + + // Allocate memory for other ranks + if (my_rank != 0) { + data = (char *)malloc(data_size * sizeof(char)); + if (data == NULL) { + fprintf(stderr, "Failed to allocate buffer\n"); + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + // Broadcast the data + MPI_Bcast(data, data_size, MPI_CHAR, 0, MPI_COMM_WORLD); +#endif + + // ********************** Parse these rows of CSV file ********************** + // read the CSV file and parse into data + char ** csv_header = (char **)calloc(MAX_COLUMNS, sizeof(char *)); + char ***csv_data = NULL; + my_csv_rows = + read_csv_from_buffer(data, &csv_header, &csv_data, &num_columns, rows_to_read, my_rank, proc_num); + +#ifdef ENABLE_MPI + MPI_Barrier(MPI_COMM_WORLD); + stime = MPI_Wtime(); +#endif + + size_t obj_created = create_objects(&obj_ids, my_csv_rows, csv_expand_factor, cont, obj_prop, my_rank); + +#ifdef ENABLE_MPI + MPI_Barrier(MPI_COMM_WORLD); + total_time = MPI_Wtime() - stime; +#endif + + if (my_rank == 0) { + println("[Object Creation] Rank %d/%d: Created %d objects, time: %.5f ms", my_rank, proc_num, + obj_created, total_time * 1000.0); + } + + // ********************** Add tags to objects ********************** + +#ifdef ENABLE_MPI + MPI_Barrier(MPI_COMM_WORLD); + stime = MPI_Wtime(); +#endif + + size_t tags_added = csv_tags_on_objects(obj_ids, csv_data, csv_header, num_columns, my_csv_rows, + csv_expand_factor, is_using_dart, add_tag_to_one_object); + +#ifdef ENABLE_MPI + MPI_Barrier(MPI_COMM_WORLD); + total_time = MPI_Wtime() - stime; +#endif + + if (my_rank == 0) { + println("[Tag Creation] Rank %d/%d: Added %d tags for %d objects, time: %.5f ms", my_rank, proc_num, + tags_added, obj_created, total_time * 1000.0); + } + + if (bypass_query) { + if (my_rank == 0) { + println("Rank %d: All queries are bypassed.", my_rank); + report_avg_server_profiling_rst(); + } + goto done; + } + + // ********************** Perform queries ********************** + int iter_round = round; + if (comm_type == 0 && is_using_dart == 0) { + iter_round = 2; + } + + for (comm_type = 1; comm_type >= 0; comm_type--) { + for (query_type = 0; query_type < 4; query_type++) { + perr_t ret_value; + int round_total = 0; + for (iter = -1; iter < iter_round; iter++) { // -1 is for warm up +#ifdef ENABLE_MPI + if (iter == 0) { + MPI_Barrier(MPI_COMM_WORLD); + stime = MPI_Wtime(); + } +#endif + nres = perform_search(is_using_dart, query_type, comm_type, iter_round); + round_total += nres; + } + +#ifdef ENABLE_MPI + MPI_Barrier(MPI_COMM_WORLD); + // MPI_Reduce(&round_total, &ntotal, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + total_time = MPI_Wtime() - stime; + + if (my_rank == 0) { + char *query_type_str = "EXACT"; + if (query_type == 1) + query_type_str = "PREFIX"; + else if (query_type == 2) + query_type_str = "SUFFIX"; + else if (query_type == 3) + query_type_str = "INFIX"; + println("[%s Client %s Query with%sINDEX] %d rounds (%d) within %.5f ms", + comm_type == 0 ? "Single" : "Multi", query_type_str, + is_using_dart == 0 ? " NO " : " DART ", round, round_total, total_time * 1000.0); + } +#endif + } // end query type + } // end comm type + + if (my_rank == 0) { + println("Rank %d: All queries are done.", my_rank); + report_avg_server_profiling_rst(); + } + + // delete all tags + +#ifdef ENABLE_MPI + MPI_Barrier(MPI_COMM_WORLD); + stime = MPI_Wtime(); +#endif + + size_t tags_deleted = csv_tags_on_objects(obj_ids, csv_data, csv_header, num_columns, my_csv_rows, + csv_expand_factor, is_using_dart, delete_tag_from_one_object); + +#ifdef ENABLE_MPI + MPI_Barrier(MPI_COMM_WORLD); + total_time = MPI_Wtime() - stime; +#endif + + if (my_rank == 0) { + println("[TAG Deletion] Rank %d/%d: Deleted %d kvtag from %d objects, time: %.5f ms", my_rank, + proc_num, tags_deleted, obj_created, total_time * 1000.0); + } + +done: + // close a container + if (PDCcont_close(cont) < 0) { + if (my_rank == 0) { + printf("fail to close container c1\n"); + } + } + else { + if (my_rank == 0) + printf("successfully close container c1\n"); + } + + // close an object property + if (PDCprop_close(obj_prop) < 0) { + if (my_rank == 0) + printf("Fail to close property @ line %d\n", __LINE__); + } + else { + if (my_rank == 0) + printf("successfully close object property\n"); + } + + // close a container property + if (PDCprop_close(cont_prop) < 0) { + if (my_rank == 0) + printf("Fail to close property @ line %d\n", __LINE__); + } + else { + if (my_rank == 0) + printf("successfully close container property\n"); + } + + // close pdc + if (PDCclose(pdc) < 0) { + if (my_rank == 0) + printf("fail to close PDC\n"); + } + +#ifdef ENABLE_MPI + MPI_Finalize(); +#endif + + return 0; +} \ No newline at end of file diff --git a/src/tools/CMakeLists.txt b/src/tools/CMakeLists.txt index 0e9574b01..c8eb78f7d 100644 --- a/src/tools/CMakeLists.txt +++ b/src/tools/CMakeLists.txt @@ -1,3 +1,13 @@ +option(JMD_DEBUG_MODE "Enable debug mode." OFF) +if(JMD_DEBUG_MODE) + add_definitions(-DJMD_DEBUG=1) +endif() + +option(JMD_VERBOSE "Enable verbose mode." OFF) +if(JMD_VERBOSE) + add_definitions(-DJMD_VERBOSE=1) +endif() + set(TOOLS_EXT_INCLUDE "") set(TOOLS_EXT_LIB "") @@ -96,3 +106,16 @@ foreach(program ${PROGRAMS}) add_executable(${program} ${program}.c) target_link_libraries(${program} pdc cjson ${TOOLS_EXT_LIB}) endforeach(program) + + +add_executable(metadata_json_loader + metadata_json_loader.c + meta_json/metadata_json_processor.h + meta_json/metadata_json_printer.c + meta_json/metadata_json_printer.h + meta_json/metadata_json_importer.c + meta_json/metadata_json_importer.h + fs/fs_ops.c + fs/fs_ops.h) +target_link_libraries(metadata_json_loader ${PDC_EXT_LIB_DEPENDENCIES} pdc cjson ${TOOLS_EXT_LIB}) +target_include_directories(metadata_json_loader PUBLIC ${PDC_EXT_INCLUDE_DEPENDENCIES}) diff --git a/src/tools/fs/fs_ops.c b/src/tools/fs/fs_ops.c new file mode 100644 index 000000000..24dd2c1a8 --- /dev/null +++ b/src/tools/fs/fs_ops.c @@ -0,0 +1,157 @@ +#include "fs_ops.h" + +int desc_cmp(int v, int end){ + return v >= end; +} + +int asc_cmp(int v, int end) { + return v <= end; +} + +int incr(int a){ + return a+1; +} + +int decr(int a){ + return a-1; +} + +void collect_dir(const char *dir_path, int (*filter) (const struct dirent *), + int (*cmp) (const struct dirent **, const struct dirent **), + sorting_direction_t sd, const int topk, + int (*on_file)(struct dirent *f_entry, const char *parent_path, void *args), + int (*on_dir)(struct dirent *d_entry, const char *parent_path, void *args), + void *coll_args, + int (*pre_op)(void *coll_args), + int (*post_op)(void *coll_args)){ + + if (dir_path == NULL) { // if the given start_dir is not a valid struct. + return; + } + + struct dirent **namelist; + int n; + n = scandir(dir_path, &namelist, filter, cmp); + if (n < 0) { + perror("error occurred at scandir"); + } else { + int v, end; + int (*cmp_nl)(int, int); + int (*v_act)(int); + int count = 0; + int pre_op_rst = 0, post_op_rst = 0; + if (sd == DESC) { + v = n - 1; + end = 0; + cmp_nl = desc_cmp; + v_act = decr; + } else { + v = 0; + end = n - 1; + cmp_nl = asc_cmp; + v_act = incr; + } + while (cmp_nl(v, end) && (topk > 0 ? count < topk : 1)) { + struct dirent *entry = namelist[v]; + char *path = (char *)calloc(1024, sizeof(char)); + char *name = (char *)calloc(1024, sizeof(char)); + snprintf(name,1023, "%s", entry->d_name); + snprintf(path, 1023, "%s/%s", dir_path, entry->d_name); + if (pre_op) { + pre_op_rst = pre_op(coll_args); + } + if (entry->d_type == DT_DIR) { + if (on_dir) { + on_dir(entry, dir_path, coll_args); + } + collect_dir(path, filter, cmp, sd, topk, on_file, on_dir, coll_args, pre_op, post_op); + } else { + if (on_file) { + on_file(entry, dir_path, coll_args); + } + } + if (post_op) { + post_op_rst = post_op(coll_args); + } + free(path); + free(name); + free(namelist[v]); + v = v_act(v); + count++; + } + free(namelist); + } +} + +int is_regular_file(const char *path){ + struct stat path_stat; + stat(path, &path_stat); + return S_ISREG(path_stat.st_mode); +} + +size_t get_file_size(const char *filename) { + struct stat st; + if(stat(filename, &st) != 0) { + return 0; + } + return st.st_size; +} + + +int dir_exists(char *dirname){ + DIR* dir = opendir(dirname); + if (dir) + { + /* Directory exists. */ + closedir(dir); + return 1; + } + else if (ENOENT == errno) + { + /* Directory does not exist. */ + return 0; + } + else + { + /* opendir() failed for some other reason. */ + return 0; + } +} + +/* Function with behaviour like `mkdir -p' */ +int +mkpath(const char *s, mode_t mode){ + char *q, *r = NULL, *path = NULL, *up = NULL; + int rv; + + rv = -1; + if (strcmp(s, ".") == 0 || strcmp(s, "/") == 0) + return (0); + + if ((path = strdup(s)) == NULL) + exit(1); + + if ((q = strdup(s)) == NULL) + exit(1); + + if ((r = dirname(q)) == NULL) + goto out; + + if ((up = strdup(r)) == NULL) + exit(1); + + if ((mkpath(up, mode) == -1) && (errno != EEXIST)) + goto out; + + if ((mkdir(path, mode) == -1) && (errno != EEXIST)) + rv = -1; + else + rv = 0; + +out: + if (up != NULL) + free(up); + free(q); + free(path); + return (rv); +} diff --git a/src/tools/fs/fs_ops.h b/src/tools/fs/fs_ops.h new file mode 100644 index 000000000..7d0e93cd6 --- /dev/null +++ b/src/tools/fs/fs_ops.h @@ -0,0 +1,34 @@ + +/* File foo. */ +#ifndef PDC_TOOLS_FS_OPS +#define PDC_TOOLS_FS_OPS + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef enum { ASC, DESC } sorting_direction_t; + +void collect_dir(const char *dir_path, int (*selector)(const struct dirent *), + int (*cmp)(const struct dirent **, const struct dirent **), sorting_direction_t sd, int topk, + int (*on_file)(struct dirent *f_entry, const char *parent_path, void *args), + int (*on_dir)(struct dirent *d_entry, const char *parent_path, void *args), void *coll_args, + int (*pre_op)(void *coll_args), int (*post_op)(void *coll_args)); + +int is_regular_file(const char *path); + +size_t get_file_size(const char *filename); + +int dir_exists(char *dirname); + +/* Function with behaviour like `mkdir -p' */ +int mkpath(const char *s, mode_t mode); + +#endif /* !PDC_TOOLS_FS_OPS */ \ No newline at end of file diff --git a/src/tools/meta_json/Readme.md b/src/tools/meta_json/Readme.md new file mode 100644 index 000000000..8a2dc4b7d --- /dev/null +++ b/src/tools/meta_json/Readme.md @@ -0,0 +1,3 @@ +# This is a directory for the meta_json tool. + +The meta_json tool is a command line tool that can be used to generate a JSON file that contains metadata about the source code in a directory. The metadata includes information about the files in the directory, the functions in the files, and the classes in the files. \ No newline at end of file diff --git a/src/tools/meta_json/llsm_converter/__init__.py b/src/tools/meta_json/llsm_converter/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/tools/meta_json/llsm_converter/llsm_csv_metadata_extractor.py b/src/tools/meta_json/llsm_converter/llsm_csv_metadata_extractor.py new file mode 100644 index 000000000..77e9be0d8 --- /dev/null +++ b/src/tools/meta_json/llsm_converter/llsm_csv_metadata_extractor.py @@ -0,0 +1,153 @@ +import pandas as pd +import os +import json +import uuid + +def extract_attributes_from_filename(filename, incr, new_obj): + attr_in_fn = filename.split('_') + # Scan_Iter + scanIter = { + "name": "Scan_Iter", + "value": int(attr_in_fn[2]) + incr, + "class" : "singleton", + "type": "int", + } + # CAM + CAM = { + "name": "Cam", + "value": attr_in_fn[3].replace('Cam', ''), + "class" : "singleton", + "type": "str", + } + # Ch + Ch = { + "name": "Ch", + "value": int(attr_in_fn[4].replace('ch', '')), + "class" : "singleton", + "type": "int", + } + # stackn + stackn = { + "name": "stackn", + "value": int(attr_in_fn[6].replace('stack', '')), + "class" : "singleton", + "type": "int", + } + # laser + laser = { + "name": "laser_nm", + "value": int(attr_in_fn[7].replace('nm', '')), + "class" : "singleton", + "type": "int", + } + # abstime + abstime = { + "name": "abstime", + "value": int(attr_in_fn[8].replace('msec', '')), + "class" : "singleton", + "type": "int", + } + # fpgatime + fpgatime = { + "name": "fpgatime", + "value": int(attr_in_fn[9].replace('msecAbs', '')), + "class" : "singleton", + "type": "int", + } + # x_str + x_str = { + "name": "x_str", + "value": int(attr_in_fn[10].replace('x', '')), + "class" : "singleton", + "type": "int", + } + # y_str + y_str = { + "name": "y_str", + "value": int(attr_in_fn[11].replace('y', '')), + "class" : "singleton", + "type": "int", + } + # z_str + z_str = { + "name": "z_str", + "value": int(attr_in_fn[12].replace('z', '')), + "class" : "singleton", + "type": "int", + } + # t_str + t_str = { + "name": "t_str", + "value": int(attr_in_fn[13].replace('t.tif', '')), + "class" : "singleton", + "type": "int", + } + new_obj["properties"].append(scanIter) + new_obj["properties"].append(CAM) + new_obj["properties"].append(Ch) + new_obj["properties"].append(stackn) + new_obj["properties"].append(laser) + new_obj["properties"].append(abstime) + new_obj["properties"].append(fpgatime) + new_obj["properties"].append(x_str) + new_obj["properties"].append(y_str) + new_obj["properties"].append(z_str) + new_obj["properties"].append(t_str) + + +def extract_metadata(input_directory, output_directory, object_replica_number): + output_dict = { + "dataset_name": "LLSM", + "dataset_description": "LLSM dataset", + "source_URL":"", + "collector": "Wei Zhang", + "objects":[] + } + num_files = 0; + for filename in os.listdir(input_directory): + if filename.endswith('.csv'): + num_files += 1 + filepath = os.path.join(input_directory, filename) + df = pd.read_csv(filepath, delimiter=',') + for incr in range(object_replica_number): + output_dict["objects"] = [] + for index, row in df.iterrows(): + unique_id = int(str(num_files) + str(incr) + str(index)) # uuid.uuid4().hex + new_obj = { + "name": f"object_{unique_id}", + "type": "file", + "full_path": row['Filepath'], + "properties":[] + } + for column_name, value in row.items(): + # format: %s/%sScan_Iter_%s_Cam%s_ch%d_CAM1_stack%04d_%dnm_%07dmsec_%010dmsecAbs%s + # variables used in original LLSM: stitching_rt, prefix, fullIter{n}, Cam(ncam), Ch(c), stackn(s), laser, abstime, fpgatime, z_str); + # example: Scan_Iter_0000_CamA_ch0_CAM1_stack0000_488nm_0000000msec_0067511977msecAbs_000x_000y_015z_0000t.tif + if column_name == 'Filename': + extract_attributes_from_filename(value, incr, new_obj) + continue # this will avoid adding the filename as a property again, instead, we add all the attributes in the filename as properties + original_prop = { + "name": column_name, + "value": value, + "class" : "singleton", + "type": str(type(value).__name__) + } + new_obj["properties"].append(original_prop) + output_dict["objects"].append(new_obj) + json_file_path = "{}/{}_{}.json".format(output_directory, filename, incr) + with open(json_file_path, "w") as json_file: + json.dump(output_dict, json_file) + print("File {} has been written".format(json_file_path)) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="LLSM CSV Metadata Extractor") + parser.add_argument("-i", "--input_directory", required=True, type=str, help="Directory path containing CSV files") + parser.add_argument("-o", "--output_directory", required=True, type=str, help="Directory path to save the JSON files") + parser.add_argument("-n", "--num_replica", required=False, type=int, help="Number of replicas for each object") + + args = parser.parse_args() + + extract_metadata(args.input_directory, args.output_directory , args.num_replica) diff --git a/src/tools/meta_json/llsm_converter/requirement.txt b/src/tools/meta_json/llsm_converter/requirement.txt new file mode 100644 index 000000000..52aa4675c --- /dev/null +++ b/src/tools/meta_json/llsm_converter/requirement.txt @@ -0,0 +1,5 @@ +pandas +pyarrow +numpy +jsonschema +argparse \ No newline at end of file diff --git a/src/tools/meta_json/llsm_converter/validator/metadata_json_schema.json b/src/tools/meta_json/llsm_converter/validator/metadata_json_schema.json new file mode 100644 index 000000000..92aca080f --- /dev/null +++ b/src/tools/meta_json/llsm_converter/validator/metadata_json_schema.json @@ -0,0 +1,207 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "dataset_name": { + "type": "string" + }, + "dataset_description": { + "type": "string" + }, + "source_URL": { + "type": "string" + }, + "collector": { + "type": "string" + }, + "objects": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "type": { + "type": "string" + }, + "full_path": { + "type": "string" + }, + "properties": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "value": { + "type": [ + "number", + "string", + "array" + ] + }, + "class": { + "type": "string", + "enum": [ + "singleton", + "array" + ] + }, + "type": { + "type": "string", + "enum": [ + "int", + "float", + "str" + ] + } + }, + "required": [ + "name", + "value", + "class", + "type" + ], + "allOf": [ + { + "if": { + "properties": { + "class": { + "const": "singleton" + }, + "type": { + "const": "int" + } + } + }, + "then": { + "properties": { + "value": { + "type": "integer" + } + } + } + }, + { + "if": { + "properties": { + "class": { + "const": "singleton" + }, + "type": { + "const": "float" + } + } + }, + "then": { + "properties": { + "value": { + "type": "number" + } + } + } + }, + { + "if": { + "properties": { + "class": { + "const": "singleton" + }, + "type": { + "const": "str" + } + } + }, + "then": { + "properties": { + "value": { + "type": "string" + } + } + } + }, + { + "if": { + "properties": { + "class": { + "const": "array" + }, + "type": { + "const": "int" + } + } + }, + "then": { + "properties": { + "value": { + "type": "array", + "items": { + "type": "integer" + } + } + } + } + }, + { + "if": { + "properties": { + "class": { + "const": "array" + }, + "type": { + "const": "float" + } + } + }, + "then": { + "properties": { + "value": { + "type": "array", + "items": { + "type": "number" + } + } + } + } + }, + { + "if": { + "properties": { + "class": { + "const": "array" + }, + "type": { + "const": "str" + } + } + }, + "then": { + "properties": { + "value": { + "type": "array", + "items": { + "type": "string" + } + } + } + } + } + ] + } + } + }, + "required": [ + "name", + "properties" + ] + } + } + }, + "required": [ + "dataset_name", + "objects" + ] +} \ No newline at end of file diff --git a/src/tools/meta_json/llsm_converter/validator/metadata_schema_validator.py b/src/tools/meta_json/llsm_converter/validator/metadata_schema_validator.py new file mode 100644 index 000000000..6c9d69bec --- /dev/null +++ b/src/tools/meta_json/llsm_converter/validator/metadata_schema_validator.py @@ -0,0 +1,34 @@ +import json +import argparse +from jsonschema import validate +from jsonschema.exceptions import ValidationError + +def load_json(file_path): + """Load JSON data from a file.""" + with open(file_path, 'r') as file: + return json.load(file) + +def validate_json(json_data, json_schema): + """Validate JSON data against a schema.""" + try: + validate(instance=json_data, schema=json_schema) + return "valid" + except ValidationError as e: + print(e) + return "invalid" + +def main(schema_path, data_path): + """Main function to load and validate JSON data against a schema.""" + schema = load_json(schema_path) + print("Schema {} loaded".format(schema_path)) + data = load_json(data_path) + print("JSON {} loaded".format(data_path)) + result = validate_json(data, schema) + print("{} is {}. Object count: {}".format(data_path, result, len(data["objects"]))) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Validate JSON against a schema.") + parser.add_argument("-s", "--schema", dest="schema_path", required=True, help="Path to the JSON schema file") + parser.add_argument("-d", "--data", dest="data_path", required=True, help="Path to the JSON data file") + args = parser.parse_args() + main(args.schema_path, args.data_path) diff --git a/src/tools/meta_json/metadata_json_importer.c b/src/tools/meta_json/metadata_json_importer.c new file mode 100644 index 000000000..aeb1df400 --- /dev/null +++ b/src/tools/meta_json/metadata_json_importer.c @@ -0,0 +1,244 @@ +#include "metadata_json_importer.h" +#include "pdc.h" +#include + +typedef struct { + pdcid_t obj_prop; + pdcid_t pdc; + pdcid_t cont_prop; + pdcid_t cont; + pdcid_t obj_id; +} pdc_importer_args_t; + +/** + * @brief init_importer + * @param md_json_args + * @return 0 if success, -1 if error + */ +int +init_importer(MD_JSON_ARGS *md_json_args) +{ + pdc_importer_args_t *pdc_args = (pdc_importer_args_t *)malloc(sizeof(pdc_importer_args_t)); + + // initilize PDC related things, and store in the above structure. + // create a pdc + pdc_args->pdc = PDCinit("pdc"); + + md_json_args->processor_args = pdc_args; + + return pdc_args->pdc; +} + +/** + * @brief import_json_header + * @param dataset_name + * @param dataset_description + * @param source_URL + * @param collector + * @param md_json_args -> here we should have the object ID stored in md_json_args + * @return 0 if success, -1 if error + */ +int +import_json_header(cJSON *dataset_name, cJSON *dataset_description, cJSON *source_URL, cJSON *collector, + MD_JSON_ARGS *md_json_args) +{ + // create object in PDC and store the object ID in md_json_args + pdc_importer_args_t *pdc_args = (pdc_importer_args_t *)md_json_args->processor_args; + + // create a container property + pdc_args->cont_prop = PDCprop_create(PDC_CONT_CREATE, pdc_args->pdc); + if (pdc_args->cont_prop <= 0) + printf("Fail to create container property @ line %d!\n", __LINE__); + + // create a container + pdc_args->cont = PDCcont_create(cJSON_GetStringValue(dataset_name), pdc_args->cont_prop); + if (pdc_args->cont <= 0) + printf("Fail to create container @ line %d!\n", __LINE__); + + // extract the strings from the JSON attributes + char *ds_desc = cJSON_GetStringValue(dataset_description); + char *ds_srcURL = cJSON_GetStringValue(source_URL); + char *ds_col = cJSON_GetStringValue(collector); + // add tags to the container + PDCcont_put_tag(pdc_args->cont, "dataset_description", (void *)ds_desc, PDC_STRING, strlen(ds_desc) + 1); + PDCcont_put_tag(pdc_args->cont, "source_URL", (void *)ds_srcURL, PDC_STRING, strlen(ds_srcURL) + 1); + PDCcont_put_tag(pdc_args->cont, "collector", (void *)ds_col, PDC_STRING, strlen(ds_col) + 1); + + // create an object property + pdc_args->obj_prop = PDCprop_create(PDC_OBJ_CREATE, pdc_args->pdc); + if (pdc_args->obj_prop <= 0) { + printf("Fail to create object property @ line %d!\n", __LINE__); + return -1; + } + + return 0; +} + +/** + * @brief import_object_base + * @param name + * @param type + * @param full_path + * @param md_json_args -> here we should have the object ID in md_json_args already + * @return 0 if success, -1 if error + */ +int +import_object_base(cJSON *name, cJSON *type, cJSON *full_path, MD_JSON_ARGS *md_json_args) +{ + pdc_importer_args_t *pdc_args = (pdc_importer_args_t *)md_json_args->processor_args; + + if (cJSON_GetStringValue(name) == NULL) { + printf("Object name is NULL!\n"); + return -1; + } + char datetime_buff[15]; + time_t now = time(NULL); + struct tm *local_time = localtime(&now); + strftime(datetime_buff, sizeof(datetime_buff), "%Y%m%d%H%M%S", local_time); + // create object in PDC and store the object ID in md_json_args + + char *object_name = (char *)calloc(strlen(cJSON_GetStringValue(name)) + 20, sizeof(char)); + sprintf(object_name, "%s_%d%s", cJSON_GetStringValue(name), md_json_args->mpi_rank, datetime_buff); + pdc_args->obj_id = PDCobj_create(pdc_args->cont, object_name, pdc_args->obj_prop); + if (pdc_args->obj_id <= 0) { + printf("Fail to create object!\n"); + return -1; + } + + if (PDCobj_put_tag(pdc_args->obj_id, "obj_full_path", (void *)cJSON_GetStringValue(full_path), PDC_STRING, + strlen(cJSON_GetStringValue(full_path)) + 1) != SUCCEED) { + printf("Fail to put tag!\n"); + return -1; + } + if (PDCobj_put_tag(pdc_args->obj_id, "obj_type", (void *)cJSON_GetStringValue(type), PDC_STRING, + strlen(cJSON_GetStringValue(type)) + 1) != SUCCEED) { + printf("Fail to put tag!\n"); + return -1; + } + + return 0; +} + +/** + * @brief import_object_property + * @param name + * @param type + * @param cls + * @param value + * @param md_json_args -> here we should have the object ID in md_json_args already + * @return 0 if success, -1 if error + * + */ +int +import_object_property(cJSON *name, cJSON *type, cJSON *cls, cJSON *value, MD_JSON_ARGS *md_json_args) +{ + pdc_importer_args_t *pdc_args = (pdc_importer_args_t *)md_json_args->processor_args; + + // create object in PDC and store the object ID in md_json_args + + // bypass cls=array for now (singletone, array, etc.), just consider singleton + if (strcmp(cls->valuestring, "singleton") != 0) { + goto end; + } + pdc_var_type_t pdc_type = PDC_UNKNOWN; + void * property_value = NULL; + size_t property_value_size = 0; + if (strcmp(type->valuestring, "int") == 0) { + pdc_type = PDC_INT; + int64_t pval = (int64_t)cJSON_GetNumberValue(value); + property_value = &pval; + property_value_size = sizeof(int64_t); + } + else if (strcmp(type->valuestring, "float") == 0) { + pdc_type = PDC_FLOAT; + double pval = cJSON_GetNumberValue(value); + property_value = &pval; + property_value_size = sizeof(double); + } + else if (strcmp(type->valuestring, "str") == 0) { + pdc_type = PDC_STRING; + char *pval = cJSON_GetStringValue(value); + if (pval != NULL) { + property_value = pval; + property_value_size = strlen(pval) + 1; + } + } + else { + printf("attr %s is of unknown type %s \n", cJSON_GetStringValue(name), cJSON_GetStringValue(type)); + goto end; + } + + if (PDCobj_put_tag(pdc_args->obj_id, name->valuestring, (void *)&property_value, pdc_type, + property_value_size) != SUCCEED) { + printf("Fail to add tag!\n"); + } + +end: + return 0; +} + +/** + * @brief finish_import_one_json + * @param md_json_args + * @return 0 if success, -1 if error + */ +int +finish_import_one_json(MD_JSON_ARGS *md_json_args) +{ + pdc_importer_args_t *pdc_args = (pdc_importer_args_t *)md_json_args->processor_args; + // finalize PDC related things + // close a container + if (PDCcont_close(pdc_args->cont) < 0) { + printf("fail to close container c1\n"); + return -1; + } + + // close a container property + if (PDCprop_close(pdc_args->obj_prop) < 0) { + printf("Fail to close property @ line %d\n", __LINE__); + return -1; + } + + if (PDCprop_close(pdc_args->cont_prop) < 0) { + printf("Fail to close property @ line %d\n", __LINE__); + return -1; + } + + return 0; +} + +/** + * @brief finalize_importer + * @param md_json_args + * @return 0 if success, -1 if error + */ +int +finalize_importer(MD_JSON_ARGS *md_json_args) +{ + pdc_importer_args_t *pdc_args = (pdc_importer_args_t *)md_json_args->processor_args; + // finalize PDC related things + // close pdc + if (PDCclose(pdc_args->pdc) < 0) { + printf("fail to close PDC\n"); + return -1; + } + + return 0; +} + +/** + * @brief create_md_json_importer + * @return + */ +MD_JSON_PROCESSOR * +create_md_json_importer() +{ + MD_JSON_PROCESSOR *md_json_processor = (MD_JSON_PROCESSOR *)malloc(sizeof(MD_JSON_PROCESSOR)); + md_json_processor->init_processor = init_importer; + md_json_processor->process_json_header = import_json_header; + md_json_processor->process_object_base = import_object_base; + md_json_processor->process_object_property = import_object_property; + md_json_processor->complete_one_json_file = finish_import_one_json; + md_json_processor->finalize_processor = finalize_importer; + return md_json_processor; +} diff --git a/src/tools/meta_json/metadata_json_importer.h b/src/tools/meta_json/metadata_json_importer.h new file mode 100644 index 000000000..2b2ea7468 --- /dev/null +++ b/src/tools/meta_json/metadata_json_importer.h @@ -0,0 +1,72 @@ +#ifndef METADATA_JSON_IMPORTER_H +#define METADATA_JSON_IMPORTER_H +#include +#include +#include +#include "cjson/cJSON.h" +#include "metadata_json_processor.h" + +// Your code here + +/** + * @brief init_importer + * @param md_json_args + * @return 0 if success, -1 if error + */ +int init_importer(MD_JSON_ARGS *md_json_args); + +/** + * @brief import_json_header + * @param dataset_name + * @param dataset_description + * @param source_URL + * @param collector + * @param md_json_args -> here we should have the object ID stored in md_json_args + * @return 0 if success, -1 if error + */ +int import_json_header(cJSON *dataset_name, cJSON *dataset_description, cJSON *source_URL, cJSON *collector, + MD_JSON_ARGS *md_json_args); + +/** + * @brief import_object_base + * @param name + * @param type + * @param full_path + * @param md_json_args -> here we should have the object ID in md_json_args already + * @return 0 if success, -1 if error + */ +int import_object_base(cJSON *name, cJSON *type, cJSON *full_path, MD_JSON_ARGS *md_json_args); + +/** + * @brief import_object_property + * @param name + * @param type + * @param cls + * @param value + * @param md_json_args -> here we should have the object ID in md_json_args already + * @return 0 if success, -1 if error + * + */ +int import_object_property(cJSON *name, cJSON *type, cJSON *cls, cJSON *value, MD_JSON_ARGS *md_json_args); + +/** + * @brief finish_import_one_json + * @param md_json_args + * @return 0 if success, -1 if error + */ +int finish_import_one_json(MD_JSON_ARGS *md_json_args); + +/** + * @brief finalize_importer + * @param md_json_args + * @return 0 if success, -1 if error + */ +int finalize_importer(MD_JSON_ARGS *md_json_args); + +/** + * @brief create_md_json_importer + * @return + */ +MD_JSON_PROCESSOR *create_md_json_importer(); + +#endif // METADATA_JSON_IMPORTER_H diff --git a/src/tools/meta_json/metadata_json_printer.c b/src/tools/meta_json/metadata_json_printer.c new file mode 100644 index 000000000..bb666d2ee --- /dev/null +++ b/src/tools/meta_json/metadata_json_printer.c @@ -0,0 +1,67 @@ +#include "metadata_json_printer.h" + +int +init_printer(MD_JSON_ARGS *md_json_args) +{ + return EXIT_SUCCESS; +} + +int +print_json_header(cJSON *dataset_name, cJSON *dataset_description, cJSON *source_URL, cJSON *collector, + MD_JSON_ARGS *md_json_args) +{ + printf("Dataset Name: %s\n", dataset_name->valuestring); + printf("Dataset Description: %s\n", dataset_description->valuestring); + printf("Source URL: %s\n", source_URL->valuestring); + printf("Collector: %s\n\n", collector->valuestring); + return 0; +} + +int +print_object_base(cJSON *name, cJSON *type, cJSON *full_path, MD_JSON_ARGS *md_json_args) +{ + printf(" Object Name: %s\n", name->valuestring); + printf(" Object Type: %s\n", type->valuestring); + printf(" Object Full Path: %s\n\n", full_path->valuestring); + return 0; +} + +int +print_object_property(cJSON *name, cJSON *type, cJSON *cls, cJSON *value, MD_JSON_ARGS *md_json_args) +{ + printf(" Property Name: %s\n", name->valuestring); + if (cJSON_IsString(value)) { + printf(" Property Value: %s\n", value->valuestring); + } + else if (cJSON_IsNumber(value)) { + printf(" Property Value: %f\n", value->valuedouble); + } + printf(" Property Class: %s\n", cls->valuestring); + printf(" Property Type: %s\n\n", type->valuestring); + return 0; +} + +int +done_printing_one_json(MD_JSON_ARGS *md_json_args) +{ + return 0; +} + +int +finalize_printer(MD_JSON_ARGS *md_json_args) +{ + return 0; +} + +MD_JSON_PROCESSOR * +create_md_json_printer() +{ + MD_JSON_PROCESSOR *md_json_processor = (MD_JSON_PROCESSOR *)malloc(sizeof(MD_JSON_PROCESSOR)); + md_json_processor->init_processor = init_printer; + md_json_processor->process_json_header = print_json_header; + md_json_processor->process_object_base = print_object_base; + md_json_processor->process_object_property = print_object_property; + md_json_processor->complete_one_json_file = done_printing_one_json; + md_json_processor->finalize_processor = finalize_printer; + return md_json_processor; +} \ No newline at end of file diff --git a/src/tools/meta_json/metadata_json_printer.h b/src/tools/meta_json/metadata_json_printer.h new file mode 100644 index 000000000..847195109 --- /dev/null +++ b/src/tools/meta_json/metadata_json_printer.h @@ -0,0 +1,70 @@ +#ifndef METADATA_JSON_PRINTER_H +#define METADATA_JSON_PRINTER_H + +#include +#include +#include +#include "cjson/cJSON.h" +#include "metadata_json_processor.h" + +/** + * @brief init_printer + * @param md_json_args + * @return 0 if success, -1 if error + */ +int init_printer(MD_JSON_ARGS *md_json_args); + +/** + * @brief print_json_header + * @param dataset_name + * @param dataset_description + * @param source_URL + * @param collector + * @param md_json_args + * @return 0 if success, -1 if error + */ +int print_json_header(cJSON *dataset_name, cJSON *dataset_description, cJSON *source_URL, cJSON *collector, + MD_JSON_ARGS *md_json_args); + +/** + * @brief print_object_base + * @param name + * @param type + * @param full_path + * @return 0 if success, -1 if error + */ +int print_object_base(cJSON *name, cJSON *type, cJSON *full_path, MD_JSON_ARGS *md_json_args); + +/** + * @brief print_object_property + * @param name + * @param type + * @param cls + * @param value + * @param md_json_args + * @return 0 if success, -1 if error + * + */ +int print_object_property(cJSON *name, cJSON *type, cJSON *cls, cJSON *value, MD_JSON_ARGS *md_json_args); + +/** + * @brief done_printing_one_json + * @param md_json_args + * @return 0 if success, -1 if error + */ +int done_printing_one_json(MD_JSON_ARGS *md_json_args); + +/** + * @brief finalize_printer + * @param md_json_args + * @return 0 if success, -1 if error + */ +int finalize_printer(MD_JSON_ARGS *md_json_args); + +/** + * @brief create_md_json_printer + * @return + */ +MD_JSON_PROCESSOR *create_md_json_printer(); + +#endif // METADATA_JSON_PRINTER_H diff --git a/src/tools/meta_json/metadata_json_processor.h b/src/tools/meta_json/metadata_json_processor.h new file mode 100644 index 000000000..a60c4ca68 --- /dev/null +++ b/src/tools/meta_json/metadata_json_processor.h @@ -0,0 +1,38 @@ +#ifndef METADATA_JSON_PROCESSOR_H +#define METADATA_JSON_PROCESSOR_H + +#include +#include +#include +#include +#include "cjson/cJSON.h" + +typedef struct { + void * processor_args; // pdc_importer_args_t + void * arg1; // unused + void * arg2; // unused + int current_file_count; + int processed_file_count; + int mpi_size; + int mpi_rank; + uint64_t total_obj_count; + uint64_t total_prop_count; +} MD_JSON_ARGS; + +typedef struct { + + int (*init_processor)(MD_JSON_ARGS *); + + int (*process_json_header)(cJSON *, cJSON *, cJSON *, cJSON *, MD_JSON_ARGS *); + + int (*process_object_base)(cJSON *, cJSON *, cJSON *, MD_JSON_ARGS *); + + int (*process_object_property)(cJSON *, cJSON *, cJSON *, cJSON *, MD_JSON_ARGS *); + + int (*complete_one_json_file)(MD_JSON_ARGS *); + + int (*finalize_processor)(MD_JSON_ARGS *); + +} MD_JSON_PROCESSOR; + +#endif // METADATA_JSON_PROCESSOR_H \ No newline at end of file diff --git a/src/tools/meta_json/test.json b/src/tools/meta_json/test.json new file mode 100644 index 000000000..4b5705d03 --- /dev/null +++ b/src/tools/meta_json/test.json @@ -0,0 +1,365 @@ +{ + "dataset_name": "LLSM", + "dataset_description": "LLSM dataset", + "source_URL": "", + "collector": "Wei Zhang", + "objects": [ + { + "name": "object00", + "type": "file", + "full_path": "/clusterfs/nvme2/Data/20221128_Korra_GaoGroupVisit/Data/20221213_OB_WT/V1_600um//Scan_Iter_0000_CamA_ch0_CAM1_stack0000_488nm_0000000msec_0067511977msecAbs_000x_000y_015z_0000t.tif", + "properties": [ + { + "name": "Filepath", + "value": "/clusterfs/nvme2/Data/20221128_Korra_GaoGroupVisit/Data/20221213_OB_WT/V1_600um//Scan_Iter_0000_CamA_ch0_CAM1_stack0000_488nm_0000000msec_0067511977msecAbs_000x_000y_015z_0000t.tif", + "class": "singleton", + "type": "str" + }, + { + "name": "Scan Iter", + "value": 0, + "class": "singleton", + "type": "int" + }, + { + "name": "Cam", + "value": "A", + "class": "singleton", + "type": "str" + }, + { + "name": "Ch", + "value": 0, + "class": "singleton", + "type": "int" + }, + { + "name": "stackn", + "value": 0, + "class": "singleton", + "type": "int" + }, + { + "name": "laser_nm", + "value": 488, + "class": "singleton", + "type": "int" + }, + { + "name": "abstime", + "value": 0, + "class": "singleton", + "type": "int" + }, + { + "name": "fpgatime", + "value": 67511977, + "class": "singleton", + "type": "int" + }, + { + "name": "x_str", + "value": 0, + "class": "singleton", + "type": "int" + }, + { + "name": "y_str", + "value": 0, + "class": "singleton", + "type": "int" + }, + { + "name": "z_str", + "value": 15, + "class": "singleton", + "type": "int" + }, + { + "name": "t_str", + "value": 0, + "class": "singleton", + "type": "int" + }, + { + "name": "StageX_um_", + "value": -2600.054803, + "class": "singleton", + "type": "float" + }, + { + "name": "StageY_um_", + "value": -1553.199101, + "class": "singleton", + "type": "float" + }, + { + "name": "StageZ_um_", + "value": -7220.279901, + "class": "singleton", + "type": "float" + }, + { + "name": "ObjectiveX_um_", + "value": -2600.054803, + "class": "singleton", + "type": "float" + }, + { + "name": "ObjectiveY_um_", + "value": -1553.199101, + "class": "singleton", + "type": "float" + }, + { + "name": "ObjectiveZ_um_", + "value": -7220.279901, + "class": "singleton", + "type": "float" + } + ] + }, + { + "name": "object01", + "type": "file", + "full_path": "/clusterfs/nvme2/Data/20221128_Korra_GaoGroupVisit/Data/20221213_OB_WT/V1_600um//Scan_Iter_0000_CamA_ch0_CAM1_stack0000_488nm_0000000msec_0067511977msecAbs_000x_000y_015z_0000t.tif", + "properties": [ + { + "name": "Filepath", + "value": "/clusterfs/nvme2/Data/20221128_Korra_GaoGroupVisit/Data/20221213_OB_WT/V1_600um//Scan_Iter_0000_CamA_ch0_CAM1_stack0000_488nm_0000000msec_0067511977msecAbs_000x_000y_015z_0000t.tif", + "class": "singleton", + "type": "str" + }, + { + "name": "Scan Iter", + "value": 1, + "class": "singleton", + "type": "int" + }, + { + "name": "Cam", + "value": "A", + "class": "singleton", + "type": "str" + }, + { + "name": "Ch", + "value": 0, + "class": "singleton", + "type": "int" + }, + { + "name": "stackn", + "value": 0, + "class": "singleton", + "type": "int" + }, + { + "name": "laser_nm", + "value": 488, + "class": "singleton", + "type": "int" + }, + { + "name": "abstime", + "value": 0, + "class": "singleton", + "type": "int" + }, + { + "name": "fpgatime", + "value": 67511977, + "class": "singleton", + "type": "int" + }, + { + "name": "x_str", + "value": 0, + "class": "singleton", + "type": "int" + }, + { + "name": "y_str", + "value": 0, + "class": "singleton", + "type": "int" + }, + { + "name": "z_str", + "value": 15, + "class": "singleton", + "type": "int" + }, + { + "name": "t_str", + "value": 0, + "class": "singleton", + "type": "int" + }, + { + "name": "StageX_um_", + "value": -2600.054803, + "class": "singleton", + "type": "float" + }, + { + "name": "StageY_um_", + "value": -1553.199101, + "class": "singleton", + "type": "float" + }, + { + "name": "StageZ_um_", + "value": -7220.279901, + "class": "singleton", + "type": "float" + }, + { + "name": "ObjectiveX_um_", + "value": -2600.054803, + "class": "singleton", + "type": "float" + }, + { + "name": "ObjectiveY_um_", + "value": -1553.199101, + "class": "singleton", + "type": "float" + }, + { + "name": "ObjectiveZ_um_", + "value": -7220.279901, + "class": "singleton", + "type": "float" + } + ] + }, + { + "name": "object02", + "type": "file", + "full_path": "/clusterfs/nvme2/Data/20221128_Korra_GaoGroupVisit/Data/20221213_OB_WT/V1_600um//Scan_Iter_0000_CamA_ch0_CAM1_stack0000_488nm_0000000msec_0067511977msecAbs_000x_000y_015z_0000t.tif", + "properties": [] + }, + { + "name": "object03", + "type": "file", + "properties": [], + "full_path": "/clusterfs/nvme2/Data/20221128_Korra_GaoGroupVisit/Data/20221213_OB_WT/V1_600um//Scan_Iter_0000_CamA_ch0_CAM1_stack0000_488nm_0000000msec_0067511977msecAbs_000x_000y_015z_0000t.tif" + }, + { + "name": "object04", + "type": "file", + "full_path": "/clusterfs/nvme2/Data/20221128_Korra_GaoGroupVisit/Data/20221213_OB_WT/V1_600um//Scan_Iter_0000_CamA_ch0_CAM1_stack0000_488nm_0000000msec_0067511977msecAbs_000x_000y_015z_0000t.tif", + "properties": [ + { + "name": "Filepath", + "value": "/clusterfs/nvme2/Data/20221128_Korra_GaoGroupVisit/Data/20221213_OB_WT/V1_600um//Scan_Iter_0000_CamA_ch0_CAM1_stack0000_488nm_0000000msec_0067511977msecAbs_000x_000y_015z_0000t.tif", + "class": "singleton", + "type": "str" + }, + { + "name": "Scan Iter", + "value": 4, + "class": "singleton", + "type": "int" + }, + { + "name": "Cam", + "value": "A", + "class": "singleton", + "type": "str" + }, + { + "name": "Ch", + "value": 0, + "class": "singleton", + "type": "int" + }, + { + "name": "stackn", + "value": 0, + "class": "singleton", + "type": "int" + }, + { + "name": "laser_nm", + "value": 488, + "class": "singleton", + "type": "int" + }, + { + "name": "abstime", + "value": 0, + "class": "singleton", + "type": "int" + }, + { + "name": "fpgatime", + "value": 67511977, + "class": "singleton", + "type": "int" + }, + { + "name": "x_str", + "value": 0, + "class": "singleton", + "type": "int" + }, + { + "name": "y_str", + "value": 0, + "class": "singleton", + "type": "int" + }, + { + "name": "z_str", + "value": 15, + "class": "singleton", + "type": "int" + }, + { + "name": "t_str", + "value": 0, + "class": "singleton", + "type": "int" + }, + { + "name": "StageX_um_", + "value": -2600.054803, + "class": "singleton", + "type": "float" + }, + { + "name": "StageY_um_", + "value": -1553.199101, + "class": "singleton", + "type": "float" + }, + { + "name": "StageZ_um_", + "value": -7220.279901, + "class": "singleton", + "type": "float" + }, + { + "name": "ObjectiveX_um_", + "value": -2600.054803, + "class": "singleton", + "type": "float" + }, + { + "name": "ObjectiveY_um_", + "value": -1553.199101, + "class": "singleton", + "type": "float" + }, + { + "name": "ObjectiveZ_um_", + "value": -7220.279901, + "class": "singleton", + "type": "float" + } + ] + } + ] +} \ No newline at end of file diff --git a/src/tools/metadata_json_loader.c b/src/tools/metadata_json_loader.c new file mode 100644 index 000000000..01f3feeee --- /dev/null +++ b/src/tools/metadata_json_loader.c @@ -0,0 +1,343 @@ +/** + * This is a JSON loader that reads the metadata from a JSON file. + * + * The JSON file is expected to have the following format: + * + */ +#include +#include +#include +#include +#include +#include "cjson/cJSON.h" +#include +#include +#include "fs/fs_ops.h" +#include "string_utils.h" +#include "timer_utils.h" + +#ifdef JMD_DEBUG +#include "meta_json/metadata_json_printer.h" +#else +#include "meta_json/metadata_json_importer.h" +#endif + +#ifdef ENABLE_MPI +#include "mpi.h" +#endif + +#include "pdc.h" + +#define MAX_JSON_FILE_SIZE 1000000 + +MD_JSON_PROCESSOR *md_json_processor; + +// typedef struct { +// int current_file_count; +// int processed_file_count; +// int mpi_size; +// int mpi_rank; +// } meta_json_loader_args_t; + +static void +initilize_md_json_processor() +{ +#ifdef JMD_DEBUG + md_json_processor = create_md_json_printer(); +#else + md_json_processor = create_md_json_importer(); +#endif +} + +static char * +read_json_file(const char *filename, void *args) +{ + FILE *fp; + char *json_str; + long json_file_size; + + fp = fopen(filename, "r"); + if (fp == NULL) { + fprintf(stderr, "Error: cannot open file %s\n", filename); + exit(1); + } + + fseek(fp, 0L, SEEK_END); + json_file_size = ftell(fp); + rewind(fp); + + // if (json_file_size > MAX_JSON_FILE_SIZE) { + // fprintf(stderr, "Error: file %s is too large\n", filename); + // exit(1); + // } + + json_str = (char *)malloc(json_file_size + 1); + if (json_str == NULL) { + fprintf(stderr, "Error: cannot allocate memory for json_str\n"); + exit(1); + } + + size_t bytesRead = fread(json_str, 1, json_file_size, fp); + if (bytesRead < json_file_size) { + if (!feof(fp)) { + fprintf(stderr, "Error: cannot read file %s\n", filename); + fclose(fp); + exit(1); + } + } + fclose(fp); + + json_str[json_file_size] = '\0'; + + return json_str; +} + +// Function to print the properties array +int +parseProperties(cJSON *properties, MD_JSON_ARGS *md_json_args) +{ + int num_properties = cJSON_GetArraySize(properties); + cJSON *property = NULL; + cJSON_ArrayForEach(property, properties) + { + cJSON *name = cJSON_GetObjectItemCaseSensitive(property, "name"); + cJSON *value = cJSON_GetObjectItemCaseSensitive(property, "value"); + cJSON *class = cJSON_GetObjectItemCaseSensitive(property, "class"); + cJSON *type = cJSON_GetObjectItemCaseSensitive(property, "type"); + + md_json_processor->process_object_property(name, type, class, value, md_json_args); + } + return num_properties; +} + +// Function to traverse and print the JSON structure +void +parseJSON(const char *jsonString, void *args) +{ +#ifdef JMD_VERBOSE + stopwatch_t total_timer; + stopwatch_t obj_timer; +#endif + cJSON *json = cJSON_Parse(jsonString); + if (json == NULL) { + const char *error_ptr = cJSON_GetErrorPtr(); + if (error_ptr != NULL) { + fprintf(stderr, "Error before: %s\n", error_ptr); + } + goto end; + } + + MD_JSON_ARGS *md_json_args = (MD_JSON_ARGS *)args; + + cJSON *dataset_name = cJSON_GetObjectItemCaseSensitive(json, "dataset_name"); + cJSON *dataset_description = cJSON_GetObjectItemCaseSensitive(json, "dataset_description"); + cJSON *source_URL = cJSON_GetObjectItemCaseSensitive(json, "source_URL"); + cJSON *collector = cJSON_GetObjectItemCaseSensitive(json, "collector"); + cJSON *objects = cJSON_GetObjectItemCaseSensitive(json, "objects"); + + md_json_processor->process_json_header(dataset_name, dataset_description, source_URL, collector, + md_json_args); + int num_objects = cJSON_GetArraySize(objects); + +#ifdef JMD_VERBOSE + println("Start to import %d objects...\n", num_objects); + timer_start(&total_timer); +#endif + + cJSON *object = NULL; + cJSON_ArrayForEach(object, objects) + { +#ifdef JMD_VERBOSE + timer_start(&obj_timer); +#endif + cJSON *name = cJSON_GetObjectItemCaseSensitive(object, "name"); + cJSON *type = cJSON_GetObjectItemCaseSensitive(object, "type"); + cJSON *full_path = cJSON_GetObjectItemCaseSensitive(object, "full_path"); + cJSON *properties = cJSON_GetObjectItemCaseSensitive(object, "properties"); + + int object_creation_result = + md_json_processor->process_object_base(name, type, full_path, md_json_args); + if (object_creation_result != 0) { + println("Error: failed to create object %s\n", cJSON_GetStringValue(name)); + continue; + } + int num_properties = parseProperties(properties, md_json_args); + + md_json_args->total_prop_count += num_properties; +#ifdef JMD_VERBOSE + timer_pause(&obj_timer); + println(" Imported object %s with %d properties in %.4f ms.\n", cJSON_GetStringValue(name), + num_properties, timer_delta_ms(&obj_timer)); +#endif + } + md_json_args->total_obj_count += num_objects; +#ifdef JMD_VERBOSE + println("Imported %d objects in %.4f ms.\n", num_objects, timer_delta_ms(&total_timer)); + md_json_processor->complete_one_json_file(md_json_args); +#endif +end: + cJSON_Delete(json); +} + +int +is_meta_json(const struct dirent *entry) +{ + if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) { + return 0; + } + if (entry->d_type == DT_DIR) { + return 1; + } + if (endsWith(entry->d_name, ".json")) { + return 1; + } + return 0; +} + +int +scan_single_meta_json_file(char *full_filepath, void *args) +{ + MD_JSON_ARGS *md_json_args = (MD_JSON_ARGS *)args; + + if (md_json_args->current_file_count % md_json_args->mpi_size != md_json_args->mpi_rank) { + goto done; + } + + char *jsonString = read_json_file(full_filepath, md_json_args); + if (jsonString == NULL) { + return EXIT_FAILURE; + } + + parseJSON(jsonString, args); + free(jsonString); + + md_json_args->processed_file_count += 1; +done: + md_json_args->current_file_count += 1; + return 0; +} + +int +on_file(struct dirent *f_entry, const char *parent_path, void *arg) +{ + char *filepath = (char *)calloc(512, sizeof(char)); + sprintf(filepath, "%s/%s", parent_path, f_entry->d_name); + scan_single_meta_json_file(filepath, arg); + free(filepath); + return 0; +} + +int +on_dir(struct dirent *d_entry, const char *parent_path, void *arg) +{ + // char *dirpath = (char *)calloc(512, sizeof(char)); + // sprintf(dirpath, "%s/%s", parent_path, d_entry->d_name); + // Nothing to do here currently. + return 0; +} + +int +scan_files_in_dir(char *path, const int topk, void *args) +{ + collect_dir(path, is_meta_json, alphasort, ASC, topk, on_file, on_dir, args, NULL, NULL); + return 0; +} + +int +main(int argc, char **argv) +{ + int rst; + int rank = 0, size = 1; + + double stime, duration; + uint64_t total_obj_count = 0; + uint64_t total_prop_count = 0; + int num_files = 0; + +#ifdef ENABLE_MPI + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); +#else + stopwatch_t global_timer; +#endif + + // check the current working directory + char cwd[768]; + if (getcwd(cwd, sizeof(cwd)) != NULL) { + // println("Current working dir: %s\n", cwd); + } + else { + perror("getcwd() error"); + return 1; + } + + if (argc != 3) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return EXIT_FAILURE; + } + + const char *INPUT_DIR = argv[1]; + int topk = atoi(argv[2]); + char full_filepath[1024]; + +#ifdef ENABLE_MPI + MPI_Barrier(MPI_COMM_WORLD); + stime = MPI_Wtime(); +#else + timer_start(&global_timer); +#endif + + initilize_md_json_processor(); + MD_JSON_ARGS *md_json_args = (MD_JSON_ARGS *)malloc(sizeof(MD_JSON_ARGS)); + // we initialize PDC in the function below + if (md_json_processor->init_processor(md_json_args) < 0) { + println("Error: failed to initialize the JSON processor.\n"); + return EXIT_FAILURE; + } + + // now we need to make sure we pass this as one of the arguments to the scan function. + md_json_args->current_file_count = 0; + md_json_args->processed_file_count = 0; + md_json_args->mpi_size = size; + md_json_args->mpi_rank = rank; + // Note: in the above, the scanner args goes to loader_args. The JSON processor args goes to arg1. + + if (is_regular_file(INPUT_DIR)) { + scan_single_meta_json_file((char *)INPUT_DIR, md_json_args); + rst = 0; + } + else { + rst = scan_files_in_dir((char *)INPUT_DIR, topk, md_json_args); + } + +#ifdef ENABLE_MPI + MPI_Barrier(MPI_COMM_WORLD); + duration = MPI_Wtime() - stime; + MPI_Reduce(&(md_json_args->total_obj_count), &total_obj_count, 1, MPI_UINT64_T, MPI_SUM, 0, + MPI_COMM_WORLD); + MPI_Reduce(&(md_json_args->total_prop_count), &total_prop_count, 1, MPI_UINT64_T, MPI_SUM, 0, + MPI_COMM_WORLD); + MPI_Reduce(&(md_json_args->processed_file_count), &num_files, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); +#else + timer_pause(&global_timer); + duration = timer_delta_ms(&global_timer) / 1000.0; + total_obj_count = md_json_args->total_obj_count; + total_prop_count = md_json_args->total_prop_count; + num_files = md_json_args->processed_file_count; +#endif + + if (rank == 0) { + println("Processed %d files, imported %" PRIu64 " objects and %" PRIu64 + " attributes. Total duration: %.4f seconds.\n", + num_files, total_obj_count, total_prop_count, duration); + } + + md_json_processor->finalize_processor(md_json_args); + +#ifdef ENABLE_MPI + MPI_Finalize(); +#endif + + return EXIT_SUCCESS; +} \ No newline at end of file