diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index ac8bd21a1..b6c358e08 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -107,7 +107,7 @@ stages: script: - cd unifyfs-build/t && $JOB_LAUNCH_COMMAND make check after_script: - - rm -rf /tmp/unify* /tmp/tmp.* /tmp/mdhim* /tmp/na_sm | true + - rm -rf /tmp/unify* /tmp/tmp.* /tmp/na_sm | true # Run the integration test suite with the options provided from the specific # job. @@ -156,7 +156,9 @@ before_script: # Running with lsf tag here, but this may be too vague as the job may attempt to # use a runner on a system the service user doesn't have permission to access. # If so, move this job to a specific system. -full_clean: +# TODO: this fails when the rm tries to run on the path this job is running +# from. Rewrite to exclude current runner. +.full_clean: stage: clean extends: .base-template variables: @@ -166,12 +168,11 @@ full_clean: tags: - shell before_script: [] - script: rm -rf ${HOME}/.jacamar-ci/* + script: rm -rf ${WORKSPACE}/.jacamar-ci/* needs: [] # System specific jobs include: - local: .gitlab/ascent.yml - - local: .gitlab/catalyst.yml - local: .gitlab/lassen.yml - local: .gitlab/quartz.yml diff --git a/.gitlab/catalyst.yml b/.gitlab/catalyst.yml deleted file mode 100644 index 8ac467b6f..000000000 --- a/.gitlab/catalyst.yml +++ /dev/null @@ -1,175 +0,0 @@ -# Catalyst Templates - -# The RUN_CATALYST variable can be toggled in the Gitlab interface to -# toggle whether jobs should be run on this system. -.catalyst-template: - extends: .base-template - rules: - - if: '$RUN_CATALYST != "ON"' - when: never - - if: '$CI_PIPELINE_SOURCE == "schedule"' - when: never - - when: on_success - -.catalyst-scheduled-template: - extends: .base-template - rules: - - if: '$RUN_CATALYST != "ON"' - when: never - - if: '$CI_PIPELINE_SOURCE == "schedule"' - -.catalyst-shell-template: - extends: .catalyst-template - tags: - - catalyst - - shell - -.catalyst-batch-template: - extends: .catalyst-template - tags: - - catalyst - - batch - -.catalyst-scheduled-shell-template: - extends: .catalyst-scheduled-template - tags: - - catalyst - - shell - -.catalyst-scheduled-batch-template: - extends: .catalyst-scheduled-template - tags: - - catalyst - - batch - -##### All Catalyst Jobs ##### - -### gcc@4.9.3 ### -.catalyst-gcc-4_9_3-template: - variables: - COMPILER: gcc/4.9.3 - CC_COMMAND: "which gcc" - FC_COMMAND: "which gfortran" - SPACK_ENV_NAME: "unifyfs-slurm-gcc4_9_3" - -catalyst-gcc-4_9_3-install-deps: - extends: [.catalyst-shell-template, .catalyst-gcc-4_9_3-template, .install-deps-template] - -catalyst-gcc-4_9_3-build: - extends: [.catalyst-shell-template, .catalyst-gcc-4_9_3-template, .build-template] - needs: ["catalyst-gcc-4_9_3-install-deps"] - -catalyst-gcc-4_9_3-unit-test: - extends: [.slurm-single-node-template, .catalyst-batch-template, .catalyst-gcc-4_9_3-template, .unit-test-template] - needs: ["catalyst-gcc-4_9_3-build"] - -# Integration tests - -catalyst-gcc-4_9_3-integ-test-writeread-posix: - variables: - CI_TEST_OPTIONS: ":: -s writeread -t posix" - extends: [.slurm-multi-node-template, .catalyst-batch-template, .catalyst-gcc-4_9_3-template, .integ-test-template] - needs: ["catalyst-gcc-4_9_3-build"] - -catalyst-gcc-4_9_3-integ-test-writeread-mpiio: - variables: - CI_TEST_OPTIONS: ":: -s writeread -t mpiio" - extends: [.slurm-multi-node-template, .catalyst-batch-template, .catalyst-gcc-4_9_3-template, .integ-test-template] - needs: ["catalyst-gcc-4_9_3-build"] - -catalyst-gcc-4_9_3-integ-test-read-posix: - variables: - CI_TEST_OPTIONS: ":: -s read -t posix" - extends: [.slurm-multi-node-template, .catalyst-batch-template, .catalyst-gcc-4_9_3-template, .integ-test-template] - needs: ["catalyst-gcc-4_9_3-build"] - -catalyst-gcc-4_9_3-integ-test-read-mpiio: - variables: - CI_TEST_OPTIONS: ":: -s read -t mpiio" - extends: [.slurm-multi-node-template, .catalyst-batch-template, .catalyst-gcc-4_9_3-template, .integ-test-template] - needs: ["catalyst-gcc-4_9_3-build"] - -catalyst-gcc-4_9_3-integ-test-pc-all: - variables: - CI_TEST_OPTIONS: ":: -s pc -t all" - extends: [.slurm-multi-node-template, .catalyst-batch-template, .catalyst-gcc-4_9_3-template, .integ-test-template] - needs: ["catalyst-gcc-4_9_3-build"] - -catalyst-gcc-4_9_3-integ-test-stage: - variables: - CI_TEST_OPTIONS: ":: -s stage" - extends: [.slurm-multi-node-template, .catalyst-batch-template, .catalyst-gcc-4_9_3-template, .integ-test-template] - needs: ["catalyst-gcc-4_9_3-build"] - - -### gcc@10.2.1 ### -.catalyst-gcc-10_2_1-template: - variables: - COMPILER: gcc/10.2.1 - CC_COMMAND: "which gcc" - FC_COMMAND: "which gfortran" - SPACK_ENV_NAME: "unifyfs-slurm-gcc10_2_1" - -catalyst-gcc-10_2_1-install-deps: - extends: [.catalyst-shell-template, .catalyst-gcc-10_2_1-template, .install-deps-template] - -catalyst-gcc-10_2_1-build: - extends: [.catalyst-shell-template, .catalyst-gcc-10_2_1-template, .build-template] - needs: ["catalyst-gcc-10_2_1-install-deps"] - -catalyst-gcc-10_2_1-unit-test: - extends: [.slurm-single-node-template, .catalyst-batch-template, .catalyst-gcc-10_2_1-template, .unit-test-template] - needs: ["catalyst-gcc-10_2_1-build"] - -# Integration tests - -catalyst-gcc-10_2_1-integ-test-writeread-posix: - variables: - CI_TEST_OPTIONS: ":: -s writeread -t posix" - extends: [.slurm-multi-node-template, .catalyst-batch-template, .catalyst-gcc-10_2_1-template, .integ-test-template] - needs: ["catalyst-gcc-10_2_1-build"] - -catalyst-gcc-10_2_1-integ-test-writeread-mpiio: - variables: - CI_TEST_OPTIONS: ":: -s writeread -t mpiio" - extends: [.slurm-multi-node-template, .catalyst-batch-template, .catalyst-gcc-10_2_1-template, .integ-test-template] - needs: ["catalyst-gcc-10_2_1-build"] - -catalyst-gcc-10_2_1-integ-test-read-posix: - variables: - CI_TEST_OPTIONS: ":: -s read -t posix" - extends: [.slurm-multi-node-template, .catalyst-batch-template, .catalyst-gcc-10_2_1-template, .integ-test-template] - needs: ["catalyst-gcc-10_2_1-build"] - -catalyst-gcc-10_2_1-integ-test-read-mpiio: - variables: - CI_TEST_OPTIONS: ":: -s read -t mpiio" - extends: [.slurm-multi-node-template, .catalyst-batch-template, .catalyst-gcc-10_2_1-template, .integ-test-template] - needs: ["catalyst-gcc-10_2_1-build"] - -catalyst-gcc-10_2_1-integ-test-pc-all: - variables: - CI_TEST_OPTIONS: ":: -s pc -t all" - extends: [.slurm-multi-node-template, .catalyst-batch-template, .catalyst-gcc-10_2_1-template, .integ-test-template] - needs: ["catalyst-gcc-10_2_1-build"] - -catalyst-gcc-10_2_1-integ-test-stage: - variables: - CI_TEST_OPTIONS: ":: -s stage" - extends: [.slurm-multi-node-template, .catalyst-batch-template, .catalyst-gcc-10_2_1-template, .integ-test-template] - needs: ["catalyst-gcc-10_2_1-build"] - -# Larger Scheduled Stage Job(s) - -catalyst-gcc-10_2_1-scheduled-install-deps: - extends: [.catalyst-scheduled-shell-template, .catalyst-gcc-10_2_1-template, .install-deps-template] - -catalyst-gcc-10_2_1-scheduled-build: - extends: [.catalyst-scheduled-shell-template, .catalyst-gcc-10_2_1-template, .build-template] - needs: ["catalyst-gcc-10_2_1-scheduled-install-deps"] - -catalyst-gcc-10_2_1-integ-test-scheduled-stage: - variables: - CI_TEST_OPTIONS: ":: -s stage" - extends: [.slurm-multi-node-template, .catalyst-scheduled-batch-template, .catalyst-gcc-10_2_1-template, .integ-test-template] - needs: ["catalyst-gcc-10_2_1-scheduled-build"] diff --git a/.gitlab/lassen.yml b/.gitlab/lassen.yml index ea88e384e..791469b9a 100644 --- a/.gitlab/lassen.yml +++ b/.gitlab/lassen.yml @@ -173,3 +173,76 @@ lassen-gcc-8_3_1-integ-test-scheduled-stage: CI_TEST_OPTIONS: ":: -s stage" extends: [.lsf-multi-node-template, .lassen-scheduled-batch-template, .lassen-gcc-8_3_1-template, .integ-test-template] needs: ["lassen-gcc-8_3_1-scheduled-build"] + + +### gcc@11.2.1 ### +.lassen-gcc-11_2_1-template: + variables: + COMPILER: gcc/11.2.1 + CC_COMMAND: "which gcc" + FC_COMMAND: "which gfortran" + SPACK_ENV_NAME: "unifyfs-lsf-gcc11_2_1" + +lassen-gcc-11_2_1-install-deps: + extends: [.lassen-shell-template, .lassen-gcc-11_2_1-template, .install-deps-template] + +lassen-gcc-11_2_1-build: + extends: [.lassen-shell-template, .lassen-gcc-11_2_1-template, .build-template] + needs: ["lassen-gcc-11_2_1-install-deps"] + +lassen-gcc-11_2_1-unit-test: + extends: [.lsf-single-node-template, .lassen-batch-template, .lassen-gcc-11_2_1-template, .unit-test-template] + needs: ["lassen-gcc-11_2_1-build"] + +# Integration tests + +lassen-gcc-11_2_1-integ-test-writeread-posix: + variables: + CI_TEST_OPTIONS: ":: -s writeread -t posix" + extends: [.lsf-multi-node-template, .lassen-batch-template, .lassen-gcc-11_2_1-template, .integ-test-template] + needs: ["lassen-gcc-11_2_1-build"] + +lassen-gcc-11_2_1-integ-test-writeread-mpiio: + variables: + CI_TEST_OPTIONS: ":: -s writeread -t mpiio" + extends: [.lsf-multi-node-template, .lassen-batch-template, .lassen-gcc-11_2_1-template, .integ-test-template] + needs: ["lassen-gcc-11_2_1-build"] + +lassen-gcc-11_2_1-integ-test-read-posix: + variables: + CI_TEST_OPTIONS: ":: -s read -t posix" + extends: [.lsf-multi-node-template, .lassen-batch-template, .lassen-gcc-11_2_1-template, .integ-test-template] + needs: ["lassen-gcc-11_2_1-build"] + +lassen-gcc-11_2_1-integ-test-read-mpiio: + variables: + CI_TEST_OPTIONS: ":: -s read -t mpiio" + extends: [.lsf-multi-node-template, .lassen-batch-template, .lassen-gcc-11_2_1-template, .integ-test-template] + needs: ["lassen-gcc-11_2_1-build"] + +lassen-gcc-11_2_1-integ-test-pc-all: + variables: + CI_TEST_OPTIONS: ":: -s pc -t all" + extends: [.lsf-multi-node-template, .lassen-batch-template, .lassen-gcc-11_2_1-template, .integ-test-template] + needs: ["lassen-gcc-11_2_1-build"] + +lassen-gcc-11_2_1-integ-test-stage: + variables: + CI_TEST_OPTIONS: ":: -s stage" + extends: [.lsf-multi-node-template, .lassen-batch-template, .lassen-gcc-11_2_1-template, .integ-test-template] + needs: ["lassen-gcc-11_2_1-build"] + +# Larger Scheduled Stage Job(s) + +lassen-gcc-11_2_1-scheduled-install-deps: + extends: [.lassen-scheduled-shell-template, .lassen-gcc-11_2_1-template, .install-deps-template] + +lassen-gcc-11_2_1-scheduled-build: + extends: [.lassen-scheduled-shell-template, .lassen-gcc-11_2_1-template, .build-template] + needs: ["lassen-gcc-11_2_1-scheduled-install-deps"] + +lassen-gcc-11_2_1-integ-test-scheduled-stage: + variables: + CI_TEST_OPTIONS: ":: -s stage" + extends: [.lsf-multi-node-template, .lassen-scheduled-batch-template, .lassen-gcc-11_2_1-template, .integ-test-template] + needs: ["lassen-gcc-11_2_1-scheduled-build"] diff --git a/.gitlab/quartz.yml b/.gitlab/quartz.yml index 68af7a780..de2bdef02 100644 --- a/.gitlab/quartz.yml +++ b/.gitlab/quartz.yml @@ -44,132 +44,132 @@ ##### All Quartz Jobs ##### -### gcc@4.9.3 ### -.quartz-gcc-4_9_3-template: +### gcc@10.3.1 ### +.quartz-gcc-10_3_1-template: variables: - COMPILER: gcc/4.9.3 + COMPILER: gcc/10.3.1 CC_COMMAND: "which gcc" FC_COMMAND: "which gfortran" - SPACK_ENV_NAME: "unifyfs-slurm-gcc4_9_3" + SPACK_ENV_NAME: "unifyfs-slurm-gcc10_3_1" -quartz-gcc-4_9_3-install-deps: - extends: [.quartz-shell-template, .quartz-gcc-4_9_3-template, .install-deps-template] +quartz-gcc-10_3_1-install-deps: + extends: [.quartz-shell-template, .quartz-gcc-10_3_1-template, .install-deps-template] -quartz-gcc-4_9_3-build: - extends: [.quartz-shell-template, .quartz-gcc-4_9_3-template, .build-template] - needs: ["quartz-gcc-4_9_3-install-deps"] +quartz-gcc-10_3_1-build: + extends: [.quartz-shell-template, .quartz-gcc-10_3_1-template, .build-template] + needs: ["quartz-gcc-10_3_1-install-deps"] -quartz-gcc-4_9_3-unit-test: - extends: [.slurm-single-node-template, .quartz-batch-template, .quartz-gcc-4_9_3-template, .unit-test-template] - needs: ["quartz-gcc-4_9_3-build"] +quartz-gcc-10_3_1-unit-test: + extends: [.slurm-single-node-template, .quartz-batch-template, .quartz-gcc-10_3_1-template, .unit-test-template] + needs: ["quartz-gcc-10_3_1-build"] # Integration tests -quartz-gcc-4_9_3-integ-test-writeread-posix: +quartz-gcc-10_3_1-integ-test-writeread-posix: variables: CI_TEST_OPTIONS: ":: -s writeread -t posix" - extends: [.slurm-multi-node-template, .quartz-batch-template, .quartz-gcc-4_9_3-template, .integ-test-template] - needs: ["quartz-gcc-4_9_3-build"] + extends: [.slurm-multi-node-template, .quartz-batch-template, .quartz-gcc-10_3_1-template, .integ-test-template] + needs: ["quartz-gcc-10_3_1-build"] -quartz-gcc-4_9_3-integ-test-writeread-mpiio: +quartz-gcc-10_3_1-integ-test-writeread-mpiio: variables: CI_TEST_OPTIONS: ":: -s writeread -t mpiio" - extends: [.slurm-multi-node-template, .quartz-batch-template, .quartz-gcc-4_9_3-template, .integ-test-template] - needs: ["quartz-gcc-4_9_3-build"] + extends: [.slurm-multi-node-template, .quartz-batch-template, .quartz-gcc-10_3_1-template, .integ-test-template] + needs: ["quartz-gcc-10_3_1-build"] -quartz-gcc-4_9_3-integ-test-read-posix: +quartz-gcc-10_3_1-integ-test-read-posix: variables: CI_TEST_OPTIONS: ":: -s read -t posix" - extends: [.slurm-multi-node-template, .quartz-batch-template, .quartz-gcc-4_9_3-template, .integ-test-template] - needs: ["quartz-gcc-4_9_3-build"] + extends: [.slurm-multi-node-template, .quartz-batch-template, .quartz-gcc-10_3_1-template, .integ-test-template] + needs: ["quartz-gcc-10_3_1-build"] -quartz-gcc-4_9_3-integ-test-read-mpiio: +quartz-gcc-10_3_1-integ-test-read-mpiio: variables: CI_TEST_OPTIONS: ":: -s read -t mpiio" - extends: [.slurm-multi-node-template, .quartz-batch-template, .quartz-gcc-4_9_3-template, .integ-test-template] - needs: ["quartz-gcc-4_9_3-build"] + extends: [.slurm-multi-node-template, .quartz-batch-template, .quartz-gcc-10_3_1-template, .integ-test-template] + needs: ["quartz-gcc-10_3_1-build"] -quartz-gcc-4_9_3-integ-test-pc-all: +quartz-gcc-10_3_1-integ-test-pc-all: variables: CI_TEST_OPTIONS: ":: -s pc -t all" - extends: [.slurm-multi-node-template, .quartz-batch-template, .quartz-gcc-4_9_3-template, .integ-test-template] - needs: ["quartz-gcc-4_9_3-build"] + extends: [.slurm-multi-node-template, .quartz-batch-template, .quartz-gcc-10_3_1-template, .integ-test-template] + needs: ["quartz-gcc-10_3_1-build"] -quartz-gcc-4_9_3-integ-test-stage: +quartz-gcc-10_3_1-integ-test-stage: variables: CI_TEST_OPTIONS: ":: -s stage" - extends: [.slurm-multi-node-template, .quartz-batch-template, .quartz-gcc-4_9_3-template, .integ-test-template] - needs: ["quartz-gcc-4_9_3-build"] + extends: [.slurm-multi-node-template, .quartz-batch-template, .quartz-gcc-10_3_1-template, .integ-test-template] + needs: ["quartz-gcc-10_3_1-build"] -### gcc@10.2.1 ### -.quartz-gcc-10_2_1-template: +### gcc@12.1.1 ### +.quartz-gcc-12_1_1-template: variables: - COMPILER: gcc/10.2.1 + COMPILER: gcc/12.1.1 CC_COMMAND: "which gcc" FC_COMMAND: "which gfortran" - SPACK_ENV_NAME: "unifyfs-slurm-gcc10_2_1" + SPACK_ENV_NAME: "unifyfs-slurm-gcc12_1_1" -quartz-gcc-10_2_1-install-deps: - extends: [.quartz-shell-template, .quartz-gcc-10_2_1-template, .install-deps-template] +quartz-gcc-12_1_1-install-deps: + extends: [.quartz-shell-template, .quartz-gcc-12_1_1-template, .install-deps-template] -quartz-gcc-10_2_1-build: - extends: [.quartz-shell-template, .quartz-gcc-10_2_1-template, .build-template] - needs: ["quartz-gcc-10_2_1-install-deps"] +quartz-gcc-12_1_1-build: + extends: [.quartz-shell-template, .quartz-gcc-12_1_1-template, .build-template] + needs: ["quartz-gcc-12_1_1-install-deps"] -quartz-gcc-10_2_1-unit-test: - extends: [.slurm-single-node-template, .quartz-batch-template, .quartz-gcc-10_2_1-template, .unit-test-template] - needs: ["quartz-gcc-10_2_1-build"] +quartz-gcc-12_1_1-unit-test: + extends: [.slurm-single-node-template, .quartz-batch-template, .quartz-gcc-12_1_1-template, .unit-test-template] + needs: ["quartz-gcc-12_1_1-build"] # Integration tests -quartz-gcc-10_2_1-integ-test-writeread-posix: +quartz-gcc-12_1_1-integ-test-writeread-posix: variables: CI_TEST_OPTIONS: ":: -s writeread -t posix" - extends: [.slurm-multi-node-template, .quartz-batch-template, .quartz-gcc-10_2_1-template, .integ-test-template] - needs: ["quartz-gcc-10_2_1-build"] + extends: [.slurm-multi-node-template, .quartz-batch-template, .quartz-gcc-12_1_1-template, .integ-test-template] + needs: ["quartz-gcc-12_1_1-build"] -quartz-gcc-10_2_1-integ-test-writeread-mpiio: +quartz-gcc-12_1_1-integ-test-writeread-mpiio: variables: CI_TEST_OPTIONS: ":: -s writeread -t mpiio" - extends: [.slurm-multi-node-template, .quartz-batch-template, .quartz-gcc-10_2_1-template, .integ-test-template] - needs: ["quartz-gcc-10_2_1-build"] + extends: [.slurm-multi-node-template, .quartz-batch-template, .quartz-gcc-12_1_1-template, .integ-test-template] + needs: ["quartz-gcc-12_1_1-build"] -quartz-gcc-10_2_1-integ-test-read-posix: +quartz-gcc-12_1_1-integ-test-read-posix: variables: CI_TEST_OPTIONS: ":: -s read -t posix" - extends: [.slurm-multi-node-template, .quartz-batch-template, .quartz-gcc-10_2_1-template, .integ-test-template] - needs: ["quartz-gcc-10_2_1-build"] + extends: [.slurm-multi-node-template, .quartz-batch-template, .quartz-gcc-12_1_1-template, .integ-test-template] + needs: ["quartz-gcc-12_1_1-build"] -quartz-gcc-10_2_1-integ-test-read-mpiio: +quartz-gcc-12_1_1-integ-test-read-mpiio: variables: CI_TEST_OPTIONS: ":: -s read -t mpiio" - extends: [.slurm-multi-node-template, .quartz-batch-template, .quartz-gcc-10_2_1-template, .integ-test-template] - needs: ["quartz-gcc-10_2_1-build"] + extends: [.slurm-multi-node-template, .quartz-batch-template, .quartz-gcc-12_1_1-template, .integ-test-template] + needs: ["quartz-gcc-12_1_1-build"] -quartz-gcc-10_2_1-integ-test-pc-all: +quartz-gcc-12_1_1-integ-test-pc-all: variables: CI_TEST_OPTIONS: ":: -s pc -t all" - extends: [.slurm-multi-node-template, .quartz-batch-template, .quartz-gcc-10_2_1-template, .integ-test-template] - needs: ["quartz-gcc-10_2_1-build"] + extends: [.slurm-multi-node-template, .quartz-batch-template, .quartz-gcc-12_1_1-template, .integ-test-template] + needs: ["quartz-gcc-12_1_1-build"] -quartz-gcc-10_2_1-integ-test-stage: +quartz-gcc-12_1_1-integ-test-stage: variables: CI_TEST_OPTIONS: ":: -s stage" - extends: [.slurm-multi-node-template, .quartz-batch-template, .quartz-gcc-10_2_1-template, .integ-test-template] - needs: ["quartz-gcc-10_2_1-build"] + extends: [.slurm-multi-node-template, .quartz-batch-template, .quartz-gcc-12_1_1-template, .integ-test-template] + needs: ["quartz-gcc-12_1_1-build"] # Larger Scheduled Stage Job(s) -quartz-gcc-10_2_1-scheduled-install-deps: - extends: [.quartz-scheduled-shell-template, .quartz-gcc-10_2_1-template, .install-deps-template] +quartz-gcc-12_1_1-scheduled-install-deps: + extends: [.quartz-scheduled-shell-template, .quartz-gcc-12_1_1-template, .install-deps-template] -quartz-gcc-10_2_1-scheduled-build: - extends: [.quartz-scheduled-shell-template, .quartz-gcc-10_2_1-template, .build-template] - needs: ["quartz-gcc-10_2_1-scheduled-install-deps"] +quartz-gcc-12_1_1-scheduled-build: + extends: [.quartz-scheduled-shell-template, .quartz-gcc-12_1_1-template, .build-template] + needs: ["quartz-gcc-12_1_1-scheduled-install-deps"] -quartz-gcc-10_2_1-integ-test-scheduled-stage: +quartz-gcc-12_1_1-integ-test-scheduled-stage: variables: CI_TEST_OPTIONS: ":: -s stage" - extends: [.slurm-multi-node-template, .quartz-scheduled-batch-template, .quartz-gcc-10_2_1-template, .integ-test-template] - needs: ["quartz-gcc-10_2_1-scheduled-build"] + extends: [.slurm-multi-node-template, .quartz-scheduled-batch-template, .quartz-gcc-12_1_1-template, .integ-test-template] + needs: ["quartz-gcc-12_1_1-scheduled-build"] diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 000000000..7e676ea8d --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,51 @@ +# .readthedocs.yaml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +formats: + - pdf + - epub + +# Set the version of Python and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: "3.12" + jobs: + post_checkout: + # Cancel building pull requests when there aren't changed in the docs directory or YAML file. + - | + if [ "$READTHEDOCS_VERSION_TYPE" = "external" ] && git diff --quiet origin/dev -- docs/ .readthedocs.yaml; + then + exit 183; + fi + pre_install: + # pip-compile was used to generate the requirements.txt file to enable + # reproducible builds. + # + # If adding or updating top-level dependencies, update requirements.in and + # then overwrite existing requirements.txt with results of `pip-compile`. + # + # If simply wanting to update transitive dependencies to pin a new + # reproducible build, then overwrite existing requirements.txt with + # results of `pip-compile`. + # + # https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html#pin-your-transitive-dependencies + - python -m pip install pip-tools + - pip-compile --strip-extras -o docs/new_requirements.txt docs/requirements.in --upgrade + +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: docs/conf.py + fail_on_warning: true + +# We recommend specifying your dependencies to enable reproducible builds: +# https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html +python: + install: + # If needing to update for pinning a new reproducible build, then overwrite with + # results of `pip-compile` command above. + - requirements: docs/requirements.txt diff --git a/.spack-env/unifyfs-lsf-gcc11_2_1/spack.yaml b/.spack-env/unifyfs-lsf-gcc11_2_1/spack.yaml new file mode 100644 index 000000000..7a35fb182 --- /dev/null +++ b/.spack-env/unifyfs-lsf-gcc11_2_1/spack.yaml @@ -0,0 +1,15 @@ +spack: + packages: + all: + compiler: [gcc@11.2.1] + providers: + mpi: [spectrum-mpi] + target: [ppc64le] + specs: + - automake@1.15.1 + - gotcha@1.0.5 + - mochi-margo@0.13.1 ^mercury~boostsys ^libfabric fabrics=rxm,sockets,tcp + - spath~mpi + view: true + concretizer: + unify: true diff --git a/.spack-env/unifyfs-lsf-gcc4_9_3/spack.yaml b/.spack-env/unifyfs-lsf-gcc4_9_3/spack.yaml index 0e039407d..4b74e3f8b 100644 --- a/.spack-env/unifyfs-lsf-gcc4_9_3/spack.yaml +++ b/.spack-env/unifyfs-lsf-gcc4_9_3/spack.yaml @@ -7,8 +7,8 @@ spack: target: [ppc64le] specs: - automake@1.15.1 - - gotcha@develop - - mochi-margo@0.9.6 cflags="-std=gnu99" ^libfabric fabrics=rxm,sockets,tcp + - gotcha@1.0.5 cflags="-std-gnu99" + - mochi-margo@0.13.1 cflags="-std=gnu99" ^mercury~boostsys ^libfabric fabrics=rxm,sockets,tcp - spath~mpi view: true concretizer: diff --git a/.spack-env/unifyfs-lsf-gcc8_3_1/spack.yaml b/.spack-env/unifyfs-lsf-gcc8_3_1/spack.yaml index b95ff4556..d7c0dadb2 100644 --- a/.spack-env/unifyfs-lsf-gcc8_3_1/spack.yaml +++ b/.spack-env/unifyfs-lsf-gcc8_3_1/spack.yaml @@ -7,8 +7,8 @@ spack: target: [ppc64le] specs: - automake@1.15.1 - - gotcha@develop - - mochi-margo@0.9.6 ^libfabric fabrics=rxm,sockets,tcp + - gotcha@1.0.5 + - mochi-margo@0.13.1 ^mercury~boostsys ^libfabric fabrics=rxm,sockets,tcp - spath~mpi view: true concretizer: diff --git a/.spack-env/unifyfs-slurm-gcc10_2_1/spack.yaml b/.spack-env/unifyfs-slurm-gcc10_2_1/spack.yaml deleted file mode 100644 index 71f027c02..000000000 --- a/.spack-env/unifyfs-slurm-gcc10_2_1/spack.yaml +++ /dev/null @@ -1,15 +0,0 @@ -spack: - packages: - all: - compiler: [gcc@10.2.1] - providers: - mpi: [mvapich2] - target: [x86_64] - specs: - - automake@1.15.1 - - gotcha@develop - - mochi-margo@0.9.6 ^libfabric fabrics=rxm,sockets,tcp - - spath~mpi - view: true - concretizer: - unify: true diff --git a/.spack-env/unifyfs-slurm-gcc10_3_1/spack.yaml b/.spack-env/unifyfs-slurm-gcc10_3_1/spack.yaml new file mode 100644 index 000000000..a5a5b273d --- /dev/null +++ b/.spack-env/unifyfs-slurm-gcc10_3_1/spack.yaml @@ -0,0 +1,14 @@ +spack: + packages: + all: + compiler: [gcc@10.3.1] + providers: + mpi: [mvapich2] + target: [x86_64_v3] + specs: + - gotcha@1.0.5 + - mochi-margo@0.13.1 ^mercury~boostsys ^libfabric fabrics=rxm,sockets,tcp + - spath~mpi + view: true + concretizer: + unify: true diff --git a/.spack-env/unifyfs-slurm-gcc12_1_1/spack.yaml b/.spack-env/unifyfs-slurm-gcc12_1_1/spack.yaml new file mode 100644 index 000000000..528a282de --- /dev/null +++ b/.spack-env/unifyfs-slurm-gcc12_1_1/spack.yaml @@ -0,0 +1,14 @@ +spack: + packages: + all: + compiler: [gcc@12.1.1] + providers: + mpi: [mvapich2] + target: [x86_64_v3] + specs: + - gotcha@1.0.5 + - mochi-margo@0.13.1 ^mercury~boostsys ^libfabric fabrics=rxm,sockets,tcp + - spath~mpi + view: true + concretizer: + unify: true diff --git a/.spack-env/unifyfs-slurm-gcc4_9_3/spack.yaml b/.spack-env/unifyfs-slurm-gcc4_9_3/spack.yaml deleted file mode 100644 index 242658840..000000000 --- a/.spack-env/unifyfs-slurm-gcc4_9_3/spack.yaml +++ /dev/null @@ -1,15 +0,0 @@ -spack: - packages: - all: - compiler: [gcc@4.9.3] - providers: - mpi: [mvapich2] - target: [x86_64] - specs: - - automake@1.15.1 - - gotcha@develop - - mochi-margo@0.9.6 cflags="-std=gnu99" ^libfabric fabrics=rxm,sockets,tcp - - spath~mpi - view: true - concretizer: - unify: true diff --git a/Makefile.am b/Makefile.am index 99e3451a6..8eb00afe2 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,4 +1,4 @@ -SUBDIRS = extras meta client server util examples t +SUBDIRS = extras client server util examples t CONFIG = ordered diff --git a/README.md b/README.md index 40686898e..4bce48aba 100644 --- a/README.md +++ b/README.md @@ -29,5 +29,10 @@ Status of UnifyFS development branch (dev): [![Read the Docs](https://readthedocs.org/projects/unifyfs/badge/?version=dev)](https://unifyfs.readthedocs.io) +## UnifyFS Citation +We recommend that you use this citation for UnifyFS: + + * Michael Brim, Adam Moody, Seung-Hwan Lim, Ross Miller, Swen Boehm, Cameron Stanavige, Kathryn Mohror, Sarp Oral, “UnifyFS: A User-level Shared File System for Unified Access to Distributed Local Storage,” 37th IEEE International Parallel & Distributed Processing Symposium (IPDPS 2023), St. Petersburg, FL, May 2023. + ## Contribute and Develop If you would like to help, please see our [contributing guidelines](https://unifyfs.readthedocs.io/en/dev/contribute-ways.html). diff --git a/client/check_fns/unifyfs_list.txt b/client/check_fns/unifyfs_list.txt index 83693605a..2e143f51b 100644 --- a/client/check_fns/unifyfs_list.txt +++ b/client/check_fns/unifyfs_list.txt @@ -8,9 +8,15 @@ int UNIFYFS_WRAP(truncate)(const char* path, off_t length) int UNIFYFS_WRAP(unlink)(const char *path) int UNIFYFS_WRAP(remove)(const char *path) int UNIFYFS_WRAP(stat)(const char *path, struct stat *buf) +int UNIFYFS_WRAP(stat64)(const char *path, struct stat64 *buf) int UNIFYFS_WRAP(fstat)(int fd, struct stat *buf) +int UNIFYFS_WRAP(fstat64)(int fd, struct stat64 *buf) int UNIFYFS_WRAP(__xstat)(int vers, const char *path, struct stat *buf) +int UNIFYFS_WRAP(__xstat64)(int vers, const char *path, struct stat64 *buf) +int UNIFYFS_WRAP(__fxstat)(int vers, int fd, struct stat *buf) +int UNIFYFS_WRAP(__fxstat64)(int vers, int fd, struct stat64 *buf) int UNIFYFS_WRAP(__lxstat)(int vers, const char *path, struct stat *buf) +int UNIFYFS_WRAP(__lxstat64)(int vers, const char *path, struct stat64 *buf) int UNIFYFS_WRAP(creat)(const char* path, mode_t mode) int UNIFYFS_WRAP(creat64)(const char* path, mode_t mode) int UNIFYFS_WRAP(open)(const char *path, int flags, ...) @@ -33,9 +39,9 @@ int UNIFYFS_WRAP(flock)(int fd, int operation) void* UNIFYFS_WRAP(mmap)(void *addr, size_t length, int prot, int flags, int fd, off_t offset) int UNIFYFS_WRAP(msync)(void *addr, size_t length, int flags) void* UNIFYFS_WRAP(mmap64)(void *addr, size_t length, int prot, int flags, int fd, off_t offset) -int UNIFYFS_WRAP(__fxstat)(int vers, int fd, struct stat *buf) int UNIFYFS_WRAP(close)(int fd) FILE* UNIFYFS_WRAP(fopen)(const char *path, const char *mode) +FILE* UNIFYFS_WRAP(fopen64)(const char *path, const char *mode) FILE* UNIFYFS_WRAP(freopen)(const char *path, const char *mode, FILE *stream) int UNIFYFS_WRAP(setvbuf)(FILE* stream, char* buf, int type, size_t size) void* UNIFYFS_WRAP(setbuf)(FILE* stream, char* buf) diff --git a/client/src/gotcha_map_unifyfs_list.c b/client/src/gotcha_map_unifyfs_list.c index 44a34831f..338f814b3 100644 --- a/client/src/gotcha_map_unifyfs_list.c +++ b/client/src/gotcha_map_unifyfs_list.c @@ -72,18 +72,33 @@ UNIFYFS_DEF(futimens, int, UNIFYFS_DEF(stat, int, (const char* path, struct stat* buf), (path, buf)) +UNIFYFS_DEF(stat64, int, + (const char* path, struct stat64* buf), + (path, buf)) UNIFYFS_DEF(fstat, int, (int fd, struct stat* buf), (fd, buf)) +UNIFYFS_DEF(fstat64, int, + (int fd, struct stat64* buf), + (fd, buf)) UNIFYFS_DEF(__xstat, int, (int vers, const char* path, struct stat* buf), (vers, path, buf)) +UNIFYFS_DEF(__xstat64, int, + (int vers, const char* path, struct stat64* buf), + (vers, path, buf)) UNIFYFS_DEF(__fxstat, int, (int vers, int fd, struct stat* buf), (vers, fd, buf)) +UNIFYFS_DEF(__fxstat64, int, + (int vers, int fd, struct stat64* buf), + (vers, fd, buf)) UNIFYFS_DEF(__lxstat, int, (int vers, const char* path, struct stat* buf), (vers, path, buf)) +UNIFYFS_DEF(__lxstat64, int, + (int vers, const char* path, struct stat64* buf), + (vers, path, buf)) UNIFYFS_DEF(statfs, int, (const char* path, struct statfs* fsbuf), (path, fsbuf)) @@ -222,6 +237,9 @@ UNIFYFS_DEF(seekdir, void, UNIFYFS_DEF(fopen, FILE*, (const char* path, const char* mode), (path, mode)) +UNIFYFS_DEF(fopen64, FILE*, + (const char* path, const char* mode), + (path, mode)) UNIFYFS_DEF(freopen, FILE*, (const char* path, const char* mode, FILE* stream), (path, mode, stream)) @@ -364,11 +382,18 @@ struct gotcha_binding_t unifyfs_wrappers[] = { { "remove", UNIFYFS_WRAP(remove), &wrappee_handle_remove }, { "utimensat", UNIFYFS_WRAP(utimensat), &wrappee_handle_utimensat }, { "futimens", UNIFYFS_WRAP(futimens), &wrappee_handle_futimens }, + { "stat", UNIFYFS_WRAP(stat), &wrappee_handle_stat }, + { "stat64", UNIFYFS_WRAP(stat64), &wrappee_handle_stat64 }, { "fstat", UNIFYFS_WRAP(fstat), &wrappee_handle_fstat }, + { "fstat64", UNIFYFS_WRAP(fstat64), &wrappee_handle_fstat64 }, { "__xstat", UNIFYFS_WRAP(__xstat), &wrappee_handle___xstat }, + { "__xstat64", UNIFYFS_WRAP(__xstat64), &wrappee_handle___xstat64 }, { "__fxstat", UNIFYFS_WRAP(__fxstat), &wrappee_handle___fxstat }, + { "__fxstat64", UNIFYFS_WRAP(__fxstat64), &wrappee_handle___fxstat64 }, { "__lxstat", UNIFYFS_WRAP(__lxstat), &wrappee_handle___lxstat }, + { "__lxstat64", UNIFYFS_WRAP(__lxstat64), &wrappee_handle___lxstat64 }, + { "statfs", UNIFYFS_WRAP(statfs), &wrappee_handle_statfs }, { "fstatfs", UNIFYFS_WRAP(fstatfs), &wrappee_handle_fstatfs }, { "creat", UNIFYFS_WRAP(creat), &wrappee_handle_creat }, @@ -413,6 +438,7 @@ struct gotcha_binding_t unifyfs_wrappers[] = { { "scandir", UNIFYFS_WRAP(scandir), &wrappee_handle_scandir }, { "seekdir", UNIFYFS_WRAP(seekdir), &wrappee_handle_seekdir }, { "fopen", UNIFYFS_WRAP(fopen), &wrappee_handle_fopen }, + { "fopen64", UNIFYFS_WRAP(fopen64), &wrappee_handle_fopen64 }, { "freopen", UNIFYFS_WRAP(freopen), &wrappee_handle_freopen }, { "setvbuf", UNIFYFS_WRAP(setvbuf), &wrappee_handle_setvbuf }, { "setbuf", UNIFYFS_WRAP(setbuf), &wrappee_handle_setbuf }, diff --git a/client/src/margo_client.c b/client/src/margo_client.c index 06184e9d0..dda8db5d4 100644 --- a/client/src/margo_client.c +++ b/client/src/margo_client.c @@ -224,14 +224,14 @@ static hg_handle_t create_handle(hg_id_t id) return handle; } -static int forward_to_server( - hg_handle_t hdl, - void* input_ptr, - double timeout_msec) +static int forward_to_server(hg_handle_t hdl, + void* input_ptr, + double timeout_msec) { hg_return_t hret = margo_forward_timed(hdl, input_ptr, timeout_msec); if (hret != HG_SUCCESS) { LOGERR("margo_forward_timed() failed - %s", HG_Error_to_string(hret)); + //margo_state_dump(client_rpc_context->mid, "-", 0, NULL); return UNIFYFS_ERROR_MARGO; } return UNIFYFS_SUCCESS; diff --git a/client/src/pmpi_wrappers.c b/client/src/pmpi_wrappers.c index ebcfef2f8..eb1ceec2a 100644 --- a/client/src/pmpi_wrappers.c +++ b/client/src/pmpi_wrappers.c @@ -21,7 +21,7 @@ #include "unifyfs.h" #include "unifyfs_rc.h" -int unifyfs_mpi_init(int* argc, char*** argv) +int unifyfs_mpi_init(int* argc, char*** argv, int required, int* provided) { int rc, ret; int rank; @@ -29,7 +29,7 @@ int unifyfs_mpi_init(int* argc, char*** argv) //fprintf(stderr, "DEBUG: %s - before PMPI_Init()\n", __func__); - ret = PMPI_Init(argc, argv); + ret = PMPI_Init_thread(argc, argv, required, provided); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &world_sz); @@ -53,14 +53,32 @@ int unifyfs_mpi_init(int* argc, char*** argv) int MPI_Init(int* argc, char*** argv) { - return unifyfs_mpi_init(argc, argv); + int provided; + return unifyfs_mpi_init(argc, argv, MPI_THREAD_SINGLE, &provided); } void mpi_init_(MPI_Fint* ierr) { int argc = 0; char** argv = NULL; - int rc = unifyfs_mpi_init(&argc, &argv); + int provided; + int rc = unifyfs_mpi_init(&argc, &argv, MPI_THREAD_SINGLE, &provided); + + if (NULL != ierr) { + *ierr = (MPI_Fint)rc; + } +} + +int MPI_Init_thread(int* argc, char*** argv, int required, int* provided) +{ + return unifyfs_mpi_init(argc, argv, required, provided); +} + +void mpi_init_thread_(MPI_Fint* required, MPI_Fint* provided, MPI_Fint* ierr) +{ + int argc = 0; + char** argv = NULL; + int rc = unifyfs_mpi_init(&argc, &argv, *((int*)required), provided); if (NULL != ierr) { *ierr = (MPI_Fint)rc; @@ -71,11 +89,6 @@ int unifyfs_mpi_finalize(void) { int rc, ret; - rc = unifyfs_unmount(); - if (UNIFYFS_SUCCESS != rc) { - fprintf(stderr, "UNIFYFS ERROR: unifyfs_unmount() failed with '%s'\n", - unifyfs_rc_enum_description((unifyfs_rc)rc)); - } //fprintf(stderr, "DEBUG: %s - before PMPI_Finalize()\n", __func__); @@ -84,6 +97,12 @@ int unifyfs_mpi_finalize(void) //fprintf(stderr, "DEBUG: %s - after PMPI_Finalize(), ret=%d\n", // __func__, ret); + rc = unifyfs_unmount(); + if (UNIFYFS_SUCCESS != rc) { + fprintf(stderr, "UNIFYFS ERROR: unifyfs_unmount() failed with '%s'\n", + unifyfs_rc_enum_description((unifyfs_rc)rc)); + } + return ret; } diff --git a/client/src/pmpi_wrappers.h b/client/src/pmpi_wrappers.h index d2066dd59..52f11c4d3 100644 --- a/client/src/pmpi_wrappers.h +++ b/client/src/pmpi_wrappers.h @@ -18,9 +18,11 @@ #include /* MPI_Init PMPI wrapper */ -int unifyfs_mpi_init(int* argc, char*** argv); +int unifyfs_mpi_init(int* argc, char*** argv, int required, int* provided); int MPI_Init(int* argc, char*** argv); +int MPI_Init_thread(int* argc, char*** argv, int required, int* provided); void mpi_init_(MPI_Fint* ierr); +void mpi_init_thread_(MPI_Fint* required, MPI_Fint* provided, MPI_Fint* ierr); /* MPI_Finalize PMPI wrapper */ int unifyfs_mpi_finalize(void); diff --git a/client/src/unifyfs-stdio.c b/client/src/unifyfs-stdio.c index e401c6076..8b5db6c5e 100644 --- a/client/src/unifyfs-stdio.c +++ b/client/src/unifyfs-stdio.c @@ -271,6 +271,7 @@ static int unifyfs_fopen_parse_mode( */ static int unifyfs_fopen(const char* path, const char* mode, + int from_fopen64, FILE** outstream) { /* assume that we'll fail */ @@ -290,6 +291,8 @@ static int unifyfs_fopen(const char* path, int open_rc = -1; int fid; off_t pos; + /* if called from fopen64(), add O_LARGEFILE flag when creating file */ + int large_file_flag = from_fopen64 ? O_LARGEFILE : 0; if (read) { /* read shall fail if file does not already exist, unifyfs_fid_open * returns ENOENT if file does not exist w/o O_CREAT @@ -309,26 +312,26 @@ static int unifyfs_fopen(const char* path, * (read/write) */ open_rc = unifyfs_fid_open(posix_client, path, - O_RDWR | O_CREAT | O_TRUNC, - perms, &fid, &pos); + O_RDWR | O_CREAT | O_TRUNC | + large_file_flag, perms, &fid, &pos); } else { /* w ==> truncate to zero length or create file for writing */ open_rc = unifyfs_fid_open(posix_client, path, - O_WRONLY | O_CREAT | O_TRUNC, - perms, &fid, &pos); + O_WRONLY | O_CREAT | O_TRUNC | + large_file_flag, perms, &fid, &pos); } } else if (append) { /* force all writes to end of file when append is set */ if (plus) { /* a+ ==> append, open or create file for update, initial file * position for reading should be at start */ - open_rc = unifyfs_fid_open(posix_client, path, O_RDWR | O_CREAT, - perms, &fid, &pos); + open_rc = unifyfs_fid_open(posix_client, path, O_RDWR | O_CREAT | + large_file_flag, perms, &fid, &pos); } else { /* a ==> append, open or create file for writing, at end of file */ open_rc = unifyfs_fid_open(posix_client, path, - O_WRONLY | O_CREAT | O_APPEND, - perms, &fid, &pos); + O_WRONLY | O_CREAT | O_APPEND | + large_file_flag, perms, &fid, &pos); } } @@ -984,7 +987,7 @@ FILE* UNIFYFS_WRAP(fopen)(const char* path, const char* mode) char upath[UNIFYFS_MAX_FILENAME]; if (unifyfs_intercept_path(path, upath)) { FILE* stream; - int rc = unifyfs_fopen(upath, mode, &stream); + int rc = unifyfs_fopen(upath, mode, 0, &stream); if (rc != UNIFYFS_SUCCESS) { errno = unifyfs_rc_errno(rc); return NULL; @@ -998,6 +1001,26 @@ FILE* UNIFYFS_WRAP(fopen)(const char* path, const char* mode) } } +FILE* UNIFYFS_WRAP(fopen64)(const char* path, const char* mode) +{ + /* check whether we should intercept this path */ + char upath[UNIFYFS_MAX_FILENAME]; + if (unifyfs_intercept_path(path, upath)) { + FILE* stream; + int rc = unifyfs_fopen(upath, mode, 1, &stream); + if (rc != UNIFYFS_SUCCESS) { + errno = unifyfs_rc_errno(rc); + return NULL; + } + errno = 0; + return stream; + } else { + MAP_OR_FAIL(fopen64); + FILE* ret = UNIFYFS_REAL(fopen64)(path, mode); + return ret; + } +} + FILE* UNIFYFS_WRAP(freopen)(const char* path, const char* mode, FILE* stream) { /* check whether we should intercept this path */ @@ -3020,9 +3043,10 @@ __svfscanf(unifyfs_stream_t* fp, const char* fmt0, va_list ap) * considered part of the scanset. */ static const u_char* -__sccl(tab, fmt) -char* tab; -const u_char* fmt; +__sccl(char* tab, const u_char* fmt) +//__sccl(tab, fmt) +//char* tab; +//const u_char* fmt; { int c, n, v; diff --git a/client/src/unifyfs-stdio.h b/client/src/unifyfs-stdio.h index de455a27a..a49ca2cee 100644 --- a/client/src/unifyfs-stdio.h +++ b/client/src/unifyfs-stdio.h @@ -51,6 +51,7 @@ UNIFYFS_DECL(fclose, int, (FILE* stream)); UNIFYFS_DECL(fflush, int, (FILE* stream)); UNIFYFS_DECL(fopen, FILE*, (const char* path, const char* mode)); +UNIFYFS_DECL(fopen64, FILE*, (const char* path, const char* mode)); UNIFYFS_DECL(freopen, FILE*, (const char* path, const char* mode, FILE* stream)); UNIFYFS_DECL(setbuf, void, (FILE* stream, char* buf)); diff --git a/client/src/unifyfs-sysio.c b/client/src/unifyfs-sysio.c index 8fa130b07..77158b60d 100644 --- a/client/src/unifyfs-sysio.c +++ b/client/src/unifyfs-sysio.c @@ -783,13 +783,68 @@ static int __stat(const char* path, struct stat* buf) /* copy attributes to stat struct */ unifyfs_file_attr_to_stat(&fattr, buf); + + errno = 0; + return 0; +} + +#if defined(HAVE_STAT64) || \ + defined(HAVE_FSTAT64) || \ + defined(HAVE___XSTAT64) || \ + defined(HAVE___LXSTAT64) || \ + defined(HAVE___FXSTAT64) +static int __stat64(const char* path, struct stat64* buf) +{ + /* check that caller gave us a buffer to write to */ + if (!buf) { + /* forgot buffer for stat */ + LOGDBG("invalid stat buffer"); + errno = EINVAL; + return -1; + } + + /* flush any pending writes if needed */ + int fid = unifyfs_fid_from_path(posix_client, path); + if (fid != -1) { + int sync_rc = unifyfs_fid_sync_extents(posix_client, fid); + if (sync_rc != UNIFYFS_SUCCESS) { + errno = unifyfs_rc_errno(sync_rc); + return -1; + } + } + + /* clear the user buffer */ + memset(buf, 0, sizeof(*buf)); + + /* get global file id for given path */ + int gfid = unifyfs_generate_gfid(path); + + /* get stat information for file */ + unifyfs_file_attr_t fattr; + memset(&fattr, 0, sizeof(fattr)); + int ret = unifyfs_get_meta_with_size(gfid, &fattr); + if (ret != UNIFYFS_SUCCESS) { + errno = unifyfs_rc_errno(ret); + return -1; + } + + /* update local file metadata (if applicable) */ + if (fid != -1) { + unifyfs_fid_update_file_meta(posix_client, fid, &fattr); + } + + /* copy attributes to stat struct */ + unifyfs_file_attr_to_stat(&fattr, buf); + errno = 0; return 0; } +#endif int UNIFYFS_WRAP(stat)(const char* path, struct stat* buf) { LOGDBG("stat was called for %s", path); + char upath[UNIFYFS_MAX_FILENAME]; if (unifyfs_intercept_path(path, upath)) { int ret = __stat(upath, buf); @@ -801,13 +856,36 @@ int UNIFYFS_WRAP(stat)(const char* path, struct stat* buf) } } +#ifdef HAVE_STAT64 +int UNIFYFS_WRAP(stat64)(const char* path, struct stat64* buf) +{ + LOGDBG("stat64 was called for %s", path); + + char upath[UNIFYFS_MAX_FILENAME]; + if (unifyfs_intercept_path(path, upath)) { + int ret = __stat64(upath, buf); + return ret; + } else { + MAP_OR_FAIL(stat64); + int ret = UNIFYFS_REAL(stat64)(path, buf); + return ret; + } +} +#endif + int UNIFYFS_WRAP(fstat)(int fd, struct stat* buf) { LOGDBG("fstat was called for fd: %d", fd); /* check whether we should intercept this file descriptor */ if (unifyfs_intercept_fd(&fd)) { + /* check if the file is still active (e.g., not closed) */ int fid = unifyfs_get_fid_from_fd(fd); + if (fid == -1) { + errno = EBADF; + return -1; + } + const char* path = unifyfs_path_from_fid(posix_client, fid); int ret = __stat(path, buf); return ret; @@ -818,6 +896,31 @@ int UNIFYFS_WRAP(fstat)(int fd, struct stat* buf) } } +#ifdef HAVE_FSTAT64 +int UNIFYFS_WRAP(fstat64)(int fd, struct stat64* buf) +{ + LOGDBG("fstat64 was called for fd: %d", fd); + + /* check whether we should intercept this file descriptor */ + if (unifyfs_intercept_fd(&fd)) { + /* check if the file is still active (e.g., not closed) */ + int fid = unifyfs_get_fid_from_fd(fd); + if (fid == -1) { + errno = EBADF; + return -1; + } + + const char* path = unifyfs_path_from_fid(posix_client, fid); + int ret = __stat64(path, buf); + return ret; + } else { + MAP_OR_FAIL(fstat64); + int ret = UNIFYFS_REAL(fstat64)(fd, buf); + return ret; + } +} +#endif + /* * NOTE on __xstat(2), __lxstat(2), and __fxstat(2) * The additional parameter vers shall be 3 or the behavior of these functions @@ -858,6 +961,27 @@ int UNIFYFS_WRAP(__xstat)(int vers, const char* path, struct stat* buf) } #endif +#ifdef HAVE___XSTAT64 +int UNIFYFS_WRAP(__xstat64)(int vers, const char* path, struct stat64* buf) +{ + LOGDBG("xstat64 was called for %s", path); + + char upath[UNIFYFS_MAX_FILENAME]; + if (unifyfs_intercept_path(path, upath)) { + if (vers != _STAT_VER) { + errno = EINVAL; + return -1; + } + int ret = __stat64(upath, buf); + return ret; + } else { + MAP_OR_FAIL(__xstat64); + int ret = UNIFYFS_REAL(__xstat64)(vers, path, buf); + return ret; + } +} +#endif + #ifdef HAVE___LXSTAT int UNIFYFS_WRAP(__lxstat)(int vers, const char* path, struct stat* buf) { @@ -879,6 +1003,27 @@ int UNIFYFS_WRAP(__lxstat)(int vers, const char* path, struct stat* buf) } #endif +#ifdef HAVE___LXSTAT64 +int UNIFYFS_WRAP(__lxstat64)(int vers, const char* path, struct stat64* buf) +{ + LOGDBG("lxstat64 was called for %s", path); + + char upath[UNIFYFS_MAX_FILENAME]; + if (unifyfs_intercept_path(path, upath)) { + if (vers != _STAT_VER) { + errno = EINVAL; + return -1; + } + int ret = __stat64(upath, buf); + return ret; + } else { + MAP_OR_FAIL(__lxstat64); + int ret = UNIFYFS_REAL(__lxstat64)(vers, path, buf); + return ret; + } +} +#endif + #ifdef HAVE___FXSTAT int UNIFYFS_WRAP(__fxstat)(int vers, int fd, struct stat* buf) { @@ -892,6 +1037,11 @@ int UNIFYFS_WRAP(__fxstat)(int vers, int fd, struct stat* buf) } int fid = unifyfs_get_fid_from_fd(fd); + /* check if the file is still active (e.g., not closed) */ + if (fid == -1) { + errno = EBADF; + return -1; + } const char* path = unifyfs_path_from_fid(posix_client, fid); int ret = __stat(path, buf); return ret; @@ -903,6 +1053,30 @@ int UNIFYFS_WRAP(__fxstat)(int vers, int fd, struct stat* buf) } #endif +#ifdef HAVE___FXSTAT64 +int UNIFYFS_WRAP(__fxstat64)(int vers, int fd, struct stat64* buf) +{ + LOGDBG("fxstat64 was called for fd %d", fd); + + /* check whether we should intercept this file descriptor */ + if (unifyfs_intercept_fd(&fd)) { + if (vers != _STAT_VER) { + errno = EINVAL; + return -1; + } + + int fid = unifyfs_get_fid_from_fd(fd); + const char* path = unifyfs_path_from_fid(posix_client, fid); + int ret = __stat64(path, buf); + return ret; + } else { + MAP_OR_FAIL(__fxstat64); + int ret = UNIFYFS_REAL(__fxstat64)(vers, fd, buf); + return ret; + } +} +#endif + #ifdef HAVE_SYS_STATFS_H diff --git a/client/src/unifyfs-sysio.h b/client/src/unifyfs-sysio.h index cc4174684..c078c76e7 100644 --- a/client/src/unifyfs-sysio.h +++ b/client/src/unifyfs-sysio.h @@ -58,12 +58,15 @@ /* file and directory operations */ UNIFYFS_DECL(access, int, (const char* pathname, int mode)); UNIFYFS_DECL(chmod, int, (const char* path, mode_t mode)); -UNIFYFS_DECL(__lxstat, int, (int vers, const char* path, struct stat* buf)); UNIFYFS_DECL(remove, int, (const char* path)); UNIFYFS_DECL(rename, int, (const char* oldpath, const char* newpath)); UNIFYFS_DECL(stat, int, (const char* path, struct stat* buf)); -UNIFYFS_DECL(statfs, int, (const char* path, struct statfs* fsbuf)); +UNIFYFS_DECL(stat64, int, (const char* path, struct stat64* buf)); UNIFYFS_DECL(__xstat, int, (int vers, const char* path, struct stat* buf)); +UNIFYFS_DECL(__xstat64, int, (int vers, const char* path, struct stat64* buf)); +UNIFYFS_DECL(__lxstat, int, (int vers, const char* path, struct stat* buf)); +UNIFYFS_DECL(__lxstat64, int, (int vers, const char* path, struct stat64* buf)); +UNIFYFS_DECL(statfs, int, (const char* path, struct statfs* fsbuf)); UNIFYFS_DECL(utimensat, int, (int dirfd, const char* pathname, const struct timespec times[2], int flags)); UNIFYFS_DECL(futimens, int, (int fd, const struct timespec times[2])); @@ -124,8 +127,10 @@ UNIFYFS_DECL(fchdir, int, (int fd)); UNIFYFS_DECL(fchmod, int, (int fd, mode_t mode)); UNIFYFS_DECL(flock, int, (int fd, int operation)); UNIFYFS_DECL(fstat, int, (int fd, struct stat* buf)); -UNIFYFS_DECL(fstatfs, int, (int fd, struct statfs* fsbuf)); +UNIFYFS_DECL(fstat64, int, (int fd, struct stat64* buf)); UNIFYFS_DECL(__fxstat, int, (int vers, int fd, struct stat* buf)); +UNIFYFS_DECL(__fxstat64, int, (int vers, int fd, struct stat64* buf)); +UNIFYFS_DECL(fstatfs, int, (int fd, struct statfs* fsbuf)); UNIFYFS_DECL(posix_fadvise, int, (int fd, off_t offset, off_t len, int advice)); /* diff --git a/client/src/unifyfs_api.c b/client/src/unifyfs_api.c index 9e2b0a5c3..b6e56f013 100644 --- a/client/src/unifyfs_api.c +++ b/client/src/unifyfs_api.c @@ -265,13 +265,13 @@ unifyfs_rc unifyfs_initialize(const char* mountpoint, /* add mount point as a new directory in the file list */ if (unifyfs_fid_from_path(client, mountpoint) < 0) { /* no entry exists for mount point, so create one */ - int fid = unifyfs_fid_create_directory(client, mountpoint); - if (fid < 0) { + rc = unifyfs_fid_create_directory(client, mountpoint); + if (rc != UNIFYFS_SUCCESS) { /* if there was an error, return it */ LOGERR("failed to create directory entry for mount point: `%s'", mountpoint); unifyfs_client_fini(client); - return UNIFYFS_FAILURE; + return rc; } } diff --git a/client/src/unifyfs_fid.c b/client/src/unifyfs_fid.c index c9df554df..9ea448ef2 100644 --- a/client/src/unifyfs_fid.c +++ b/client/src/unifyfs_fid.c @@ -553,15 +553,15 @@ int unifyfs_fid_open( } } - /* File should exist at this point, - * update our cache with its metadata. */ + /* Update our cache with its metadata. */ ret = unifyfs_fid_fetch(client, path); if (ret != UNIFYFS_SUCCESS) { - /* Failed to get metadata for a file that should exist. - * Perhaps it was since deleted. We could try to create - * it again and loop through these steps, but for now - * consider this situation to be an error. */ - LOGERR("Failed to get metadata on existing file %s", path); + /* Failed to get metadata. + * Perhaps it was opened without O_CREAT or it + * was since deleted. For the latter, we could + * try to create it again and loop through these + * steps. */ + LOGDBG("Failed to get metadata on file %s", path); return ret; } diff --git a/common/src/Makefile.mk b/common/src/Makefile.mk index e5b53102e..741b319dc 100644 --- a/common/src/Makefile.mk +++ b/common/src/Makefile.mk @@ -6,6 +6,8 @@ UNIFYFS_COMMON_INSTALL_HDRS = \ UNIFYFS_COMMON_BASE_SRCS = \ %reldir%/arraylist.h \ %reldir%/arraylist.c \ + %reldir%/compare_fn.h \ + %reldir%/compare_fn.c \ %reldir%/ini.h \ %reldir%/ini.c \ %reldir%/rm_enumerator.h \ @@ -45,10 +47,10 @@ UNIFYFS_COMMON_BASE_SRCS = \ UNIFYFS_COMMON_BASE_FLAGS = \ -DSYSCONFDIR="$(sysconfdir)" \ - $(MARGO_CFLAGS) + $(MARGO_CFLAGS) $(OPENSSL_CFLAGS) UNIFYFS_COMMON_BASE_LIBS = \ - $(MARGO_LDFLAGS) $(MARGO_LIBS) -lmercury_util \ + $(MARGO_LDFLAGS) $(MARGO_LIBS) $(OPENSSL_LIBS) -lmercury_util \ -lm -lrt -lcrypto -lpthread UNIFYFS_COMMON_OPT_FLAGS = diff --git a/common/src/compare_fn.c b/common/src/compare_fn.c new file mode 100644 index 000000000..dc9f31327 --- /dev/null +++ b/common/src/compare_fn.c @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2023, Lawrence Livermore National Security, LLC. + * Produced at the Lawrence Livermore National Laboratory. + * + * Copyright 2023, UT-Battelle, LLC. + * + * LLNL-CODE-741539 + * All rights reserved. + * + * This is the license for UnifyFS. + * For details, see https://github.com/LLNL/UnifyFS. + * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. + */ + +#include "compare_fn.h" + +int int_compare_fn(const void* a, const void* b) +{ + int ai = *(int*)a; + int bi = *(int*)b; + if (ai == bi) { + return 0; + } else if (ai > bi) { + return 1; + } else { + return -1; + } +} + +int uint_compare_fn(const void* a, const void* b) +{ + unsigned int ai = *(unsigned int*)a; + unsigned int bi = *(unsigned int*)b; + if (ai == bi) { + return 0; + } else if (ai > bi) { + return 1; + } else { + return -1; + } +} + +int float_compare_fn(const void* a, const void* b) +{ + float af = *(float*)a; + float bf = *(float*)b; + if (af == bf) { + return 0; + } else if (af > bf) { + return 1; + } else { + return -1; + } +} + +int double_compare_fn(const void* a, const void* b) +{ + double ad = *(double*)a; + double bd = *(double*)b; + if (ad == bd) { + return 0; + } else if (ad > bd) { + return 1; + } else { + return -1; + } +} diff --git a/common/src/compare_fn.h b/common/src/compare_fn.h new file mode 100644 index 000000000..b5a12d8e3 --- /dev/null +++ b/common/src/compare_fn.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2023, Lawrence Livermore National Security, LLC. + * Produced at the Lawrence Livermore National Laboratory. + * + * Copyright 2023, UT-Battelle, LLC. + * + * LLNL-CODE-741539 + * All rights reserved. + * + * This is the license for UnifyFS. + * For details, see https://github.com/LLNL/UnifyFS. + * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. + */ + +#ifndef COMPARE_FUNC_H +#define COMPARE_FUNC_H + +typedef int (*compare_fn)(const void *, const void *); + +int int_compare_fn(const void* a, const void* b); +int uint_compare_fn(const void* a, const void* b); + +int float_compare_fn(const void* a, const void* b); +int double_compare_fn(const void* a, const void* b); + +#endif /* COMPARE_FN_H */ diff --git a/common/src/rm_enumerator.h b/common/src/rm_enumerator.h index f8bfddf0b..aa5b781dd 100644 --- a/common/src/rm_enumerator.h +++ b/common/src/rm_enumerator.h @@ -33,6 +33,7 @@ ENUMITEM(SLURM, "SchedMD SLURM") \ ENUMITEM(LSF, "IBM Spectrum LSF") \ ENUMITEM(LSF_CSM, "IBM Spectrum LSF with Cluster System Management") \ + ENUMITEM(FLUX, "Flux") \ #ifdef __cplusplus extern "C" { diff --git a/common/src/seg_tree.c b/common/src/seg_tree.c index 7374756dc..73952a4c7 100644 --- a/common/src/seg_tree.c +++ b/common/src/seg_tree.c @@ -38,7 +38,8 @@ #endif static int -compare_func(struct seg_tree_node* node1, struct seg_tree_node* node2) +stn_compare_func(struct seg_tree_node* node1, + struct seg_tree_node* node2) { if (node1->start > node2->end) { return 1; @@ -49,14 +50,14 @@ compare_func(struct seg_tree_node* node1, struct seg_tree_node* node2) } } -RB_PROTOTYPE(inttree, seg_tree_node, entry, compare_func) -RB_GENERATE(inttree, seg_tree_node, entry, compare_func) +RB_PROTOTYPE(inttree, seg_tree_node, entry, stn_compare_func) +RB_GENERATE(inttree, seg_tree_node, entry, stn_compare_func) /* Returns 0 on success, positive non-zero error code otherwise */ int seg_tree_init(struct seg_tree* seg_tree) { memset(seg_tree, 0, sizeof(*seg_tree)); - pthread_rwlock_init(&seg_tree->rwlock, NULL); + ABT_rwlock_create(&(seg_tree->rwlock)); RB_INIT(&seg_tree->head); return 0; @@ -68,6 +69,7 @@ int seg_tree_init(struct seg_tree* seg_tree) void seg_tree_destroy(struct seg_tree* seg_tree) { seg_tree_clear(seg_tree); + ABT_rwlock_free(&(seg_tree->rwlock)); } /* Allocate a node for the range tree. Free node with free() when finished */ @@ -523,9 +525,9 @@ seg_tree_iter(struct seg_tree* seg_tree, struct seg_tree_node* start) void seg_tree_rdlock(struct seg_tree* seg_tree) { - int rc = pthread_rwlock_rdlock(&seg_tree->rwlock); + int rc = ABT_rwlock_rdlock(seg_tree->rwlock); if (rc) { - LOGERR("pthread_rwlock_rdlock() failed - rc=%d", rc); + LOGERR("ABT_rwlock_rdlock() failed - rc=%d", rc); } } @@ -537,9 +539,9 @@ seg_tree_rdlock(struct seg_tree* seg_tree) void seg_tree_wrlock(struct seg_tree* seg_tree) { - int rc = pthread_rwlock_wrlock(&seg_tree->rwlock); + int rc = ABT_rwlock_wrlock(seg_tree->rwlock); if (rc) { - LOGERR("pthread_rwlock_wrlock() failed - rc=%d", rc); + LOGERR("ABT_rwlock_wrlock() failed - rc=%d", rc); } } @@ -551,9 +553,9 @@ seg_tree_wrlock(struct seg_tree* seg_tree) void seg_tree_unlock(struct seg_tree* seg_tree) { - int rc = pthread_rwlock_unlock(&seg_tree->rwlock); + int rc = ABT_rwlock_unlock(seg_tree->rwlock); if (rc) { - LOGERR("pthread_rwlock_unlock() failed - rc=%d", rc); + LOGERR("ABT_rwlock_unlock() failed - rc=%d", rc); } } @@ -589,6 +591,7 @@ void seg_tree_clear(struct seg_tree* seg_tree) seg_tree->count = 0; seg_tree->max = 0; + seg_tree_unlock(seg_tree); } diff --git a/common/src/seg_tree.h b/common/src/seg_tree.h index 9c528677f..7ebfacf0e 100644 --- a/common/src/seg_tree.h +++ b/common/src/seg_tree.h @@ -15,7 +15,7 @@ #ifndef __SEG_TREE_H__ #define __SEG_TREE_H__ -#include +#include #include "tree.h" struct seg_tree_node { @@ -28,7 +28,7 @@ struct seg_tree_node { struct seg_tree { RB_HEAD(inttree, seg_tree_node) head; - pthread_rwlock_t rwlock; + ABT_rwlock rwlock; unsigned long count; /* number of segments stored in tree */ unsigned long max; /* maximum logical offset value in the tree */ }; diff --git a/common/src/unifyfs_client_rpcs.h b/common/src/unifyfs_client_rpcs.h index 12702f8ee..b509e892f 100644 --- a/common/src/unifyfs_client_rpcs.h +++ b/common/src/unifyfs_client_rpcs.h @@ -20,6 +20,7 @@ */ #include +#include #include #include #include @@ -56,6 +57,14 @@ typedef enum { UNIFYFS_CLIENT_CALLBACK_UNLINK } client_callback_e; +typedef struct { + client_rpc_e req_type; + hg_handle_t handle; + void* input; + void* bulk_buf; + size_t bulk_sz; +} client_rpc_req_t; + /* unifyfs_attach_rpc (client => server) * * initialize server access to client's shared memory and file state */ diff --git a/common/src/unifyfs_meta.c b/common/src/unifyfs_meta.c index 10a3bd735..f3bfbec51 100644 --- a/common/src/unifyfs_meta.c +++ b/common/src/unifyfs_meta.c @@ -12,9 +12,10 @@ * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. */ +#include #include +#include #include -#include #include "unifyfs_meta.h" @@ -39,9 +40,13 @@ uint64_t compute_path_md5(const char* path) { unsigned long len; unsigned char digested[16] = {0}; + unsigned int digestSize; + /* digestSize is set by EVP_Digest(). For MD5 digests, it should always + * be 16. */ len = strlen(path); - MD5((const unsigned char*) path, len, digested); + EVP_Digest(path, len, digested, &digestSize, EVP_md5(), NULL); + assert(digestSize == 16); /* construct uint64_t hash from first 8 digest bytes */ uint64_t* digest_value = (uint64_t*) digested; diff --git a/common/src/unifyfs_meta.h b/common/src/unifyfs_meta.h index 0c1d35116..6a78f2004 100644 --- a/common/src/unifyfs_meta.h +++ b/common/src/unifyfs_meta.h @@ -36,6 +36,8 @@ extern "C" { # define UNIFYFS_METADATA_CACHE_SECONDS 5 #endif +/* a valid gfid generated via MD5 hash will never be zero */ +#define INVALID_GFID (0) /* extent slice size used for metadata */ extern size_t meta_slice_sz; @@ -109,6 +111,9 @@ typedef struct { struct timespec atime; struct timespec mtime; struct timespec ctime; + + /* metadata caching timestamp */ + time_t last_update; } unifyfs_file_attr_t; enum { @@ -123,7 +128,7 @@ void unifyfs_file_attr_set_invalid(unifyfs_file_attr_t* attr) { memset(attr, 0, sizeof(*attr)); attr->filename = NULL; - attr->gfid = -1; + attr->gfid = INVALID_GFID; attr->is_laminated = -1; attr->is_shared = -1; attr->mode = (uint32_t) -1; @@ -173,10 +178,18 @@ int unifyfs_file_attr_update(int attr_op, { if (!dst || !src || (attr_op == UNIFYFS_FILE_ATTR_OP_INVALID) - || (dst->gfid != src->gfid)) { + || (src->gfid == INVALID_GFID)) { return EINVAL; } + if (attr_op == UNIFYFS_FILE_ATTR_OP_CREATE) { + dst->gfid = src->gfid; + } + + struct timespec tp = {0}; + clock_gettime(CLOCK_REALTIME, &tp); + dst->last_update = tp.tv_sec; + LOGDBG("updating attributes for gfid=%d", dst->gfid); /* Update fields only with valid values and associated operation. @@ -263,40 +276,43 @@ int unifyfs_file_attr_update(int attr_op, return 0; } -static inline -void unifyfs_file_attr_to_stat(unifyfs_file_attr_t* fattr, struct stat* sb) -{ - if (fattr && sb) { - debug_print_file_attr(fattr); - - sb->st_dev = UNIFYFS_STAT_DEFAULT_DEV; - sb->st_ino = fattr->gfid; - sb->st_mode = fattr->mode; - sb->st_uid = fattr->uid; - sb->st_gid = fattr->gid; - sb->st_rdev = UNIFYFS_STAT_DEFAULT_DEV; - sb->st_size = fattr->size; - - /* TODO: use cfg.logio_chunk_size here for st_blksize - * and report actual chunks allocated for st_blocks */ - sb->st_blksize = UNIFYFS_STAT_DEFAULT_BLKSIZE; - sb->st_blocks = fattr->size / UNIFYFS_STAT_DEFAULT_BLKSIZE; - if (fattr->size % UNIFYFS_STAT_DEFAULT_BLKSIZE > 0) { - sb->st_blocks += 1; - } - - /* - * Re-purpose st_nlink to tell us if the file is laminated or not. - * That way, if we do eventually make /unifyfs mountable, we can easily - * see with 'ls -l' or stat if the file is laminated or not. - */ - sb->st_nlink = fattr->is_laminated ? 1 : 0; - - sb->st_atim = fattr->atime; - sb->st_mtim = fattr->mtime; - sb->st_ctim = fattr->ctime; - } -} +/* + * Convert UnifyFS file attr to struct stat/stat64 + * + * fattr is type of unifyfs_file_attr_t* + * sb is type of struct stat* or struct stat64* + */ +#define unifyfs_file_attr_to_stat(fattr, sb) \ +do { \ + unifyfs_file_attr_t* _fattr = (fattr); \ + if (_fattr && (NULL != (void*)(sb))) { \ + debug_print_file_attr(_fattr); \ + (sb)->st_dev = UNIFYFS_STAT_DEFAULT_DEV; \ + (sb)->st_ino = _fattr->gfid; \ + (sb)->st_mode = _fattr->mode; \ + (sb)->st_uid = _fattr->uid; \ + (sb)->st_gid = _fattr->gid; \ + (sb)->st_rdev = UNIFYFS_STAT_DEFAULT_DEV; \ + (sb)->st_size = _fattr->size; \ + \ + /* TODO: use cfg.logio_chunk_size here for st_blksize */ \ + /* and report actual chunks allocated for st_blocks */ \ + (sb)->st_blksize = UNIFYFS_STAT_DEFAULT_BLKSIZE; \ + (sb)->st_blocks = _fattr->size / UNIFYFS_STAT_DEFAULT_BLKSIZE; \ + if (_fattr->size % UNIFYFS_STAT_DEFAULT_BLKSIZE > 0) { \ + (sb)->st_blocks += 1; \ + } \ + /* Re-purpose st_nlink to tell us if the file is laminated or not. */ \ + /* That way, if we do eventually make /unifyfs mountable, we can */ \ + /* easily see with 'ls -l' or stat if the file is laminated or not.*/ \ + (sb)->st_nlink = _fattr->is_laminated ? 1 : 0; \ + \ + (sb)->st_atim = _fattr->atime; \ + (sb)->st_mtim = _fattr->mtime; \ + (sb)->st_ctim = _fattr->ctime; \ + } \ +} while (0) + /* given an input mode, mask it with umask and return. * set perms=0 to request all read/write bits */ @@ -333,16 +349,6 @@ int compare_name_rank_pair(const void* a, const void* b) return cmp; } -/* qsort comparison function for int */ -static inline -int compare_int(const void* a, const void* b) -{ - int aval = *(const int*)a; - int bval = *(const int*)b; - return aval - bval; -} - - /* * Hash a file path to a uint64_t using MD5 * @param path absolute file path @@ -351,7 +357,7 @@ int compare_int(const void* a, const void* b) uint64_t compute_path_md5(const char* path); /* - * Hash a file path to an integer gfid + * Hash a file path to a positive integer gfid * @param path absolute file path * @return gfid */ @@ -362,12 +368,8 @@ int unifyfs_generate_gfid(const char* path) uint64_t hash64 = compute_path_md5(path); uint32_t hash32 = (uint32_t)(hash64 >> 32); - /* TODO: Remove next statement once we get rid of MDHIM. - * - * MDHIM requires positive values for integer keys, due to the way - * slice servers are calculated. We use an integer key for the - * gfid -> file attributes index. To guarantee a positive value, we - * shift right one bit to make sure the top bit is zero. */ + /* To guarantee a positive value, we shift right one bit + * to make sure the top bit is zero. */ hash32 = hash32 >> 1; return (int)hash32; diff --git a/common/src/unifyfs_rc.h b/common/src/unifyfs_rc.h index 627108cc5..000f25d29 100644 --- a/common/src/unifyfs_rc.h +++ b/common/src/unifyfs_rc.h @@ -43,8 +43,6 @@ ENUMITEM(GOTCHA, "Gotcha operation error") \ ENUMITEM(KEYVAL, "Key-value store operation error") \ ENUMITEM(MARGO, "Mercury/Argobots operation error") \ - ENUMITEM(MDHIM, "MDHIM operation error") \ - ENUMITEM(META, "Metadata store operation error") \ ENUMITEM(NYI, "Not yet implemented") \ ENUMITEM(PMI, "PMI2/PMIx error") \ ENUMITEM(SHMEM, "Shared memory region init/access error") \ diff --git a/common/src/unifyfs_server_rpcs.h b/common/src/unifyfs_server_rpcs.h index 69e71ab86..f4cd5d122 100644 --- a/common/src/unifyfs_server_rpcs.h +++ b/common/src/unifyfs_server_rpcs.h @@ -20,6 +20,7 @@ */ #include +#include #include #include #include @@ -41,15 +42,17 @@ typedef enum { UNIFYFS_SERVER_RPC_LAMINATE, UNIFYFS_SERVER_RPC_METAGET, UNIFYFS_SERVER_RPC_METASET, - UNIFYFS_SERVER_RPC_PID_REPORT, UNIFYFS_SERVER_RPC_TRANSFER, UNIFYFS_SERVER_RPC_TRUNCATE, + UNIFYFS_SERVER_BCAST_RPC_BOOTSTRAP, UNIFYFS_SERVER_BCAST_RPC_EXTENTS, UNIFYFS_SERVER_BCAST_RPC_FILEATTR, UNIFYFS_SERVER_BCAST_RPC_LAMINATE, + UNIFYFS_SERVER_BCAST_RPC_METAGET, UNIFYFS_SERVER_BCAST_RPC_TRANSFER, UNIFYFS_SERVER_BCAST_RPC_TRUNCATE, - UNIFYFS_SERVER_BCAST_RPC_UNLINK + UNIFYFS_SERVER_BCAST_RPC_UNLINK, + UNIFYFS_SERVER_PENDING_SYNC } server_rpc_e; /* structure to track server-to-server rpc request state */ @@ -183,6 +186,13 @@ MERCURY_GEN_PROC(bcast_progress_out_t, ((int32_t)(ret))) DECLARE_MARGO_RPC_HANDLER(bcast_progress_rpc) +/* Broadcast 'bootstrap complete' to all servers */ +MERCURY_GEN_PROC(bootstrap_complete_bcast_in_t, + ((int32_t)(root))) +MERCURY_GEN_PROC(bootstrap_complete_bcast_out_t, + ((int32_t)(ret))) +DECLARE_MARGO_RPC_HANDLER(bootstrap_complete_bcast_rpc) + /* Broadcast file extents to all servers */ MERCURY_GEN_PROC(extent_bcast_in_t, ((int32_t)(root)) @@ -241,6 +251,23 @@ MERCURY_GEN_PROC(unlink_bcast_out_t, ((int32_t)(ret))) DECLARE_MARGO_RPC_HANDLER(unlink_bcast_rpc) +/* Broadcast request for metadata to all servers */ +/* Sends a request to all servers to reply with a the metadata for + * all files that they own. */ +MERCURY_GEN_PROC(metaget_all_bcast_in_t, + ((int32_t)(root))) +MERCURY_GEN_PROC(metaget_all_bcast_out_t, + ((int32_t)(num_files)) + ((hg_bulk_t)(file_meta)) + ((hg_string_t)(filenames)) + ((int32_t)(ret))) +/* file_meta will be an array of unifyfs_file_attr_t structs. Since + * these structs store the filename in separately allocated memory, we'll + * have to send all the filenames separately from the array of structs. + * That's what filenames is for: we'll concatenate all the filenames into + * a single hg_string_t, send that and then recreate correct + * unifyfs_file_attr_t structs at the receiving end. */ +DECLARE_MARGO_RPC_HANDLER(metaget_all_bcast_rpc) #ifdef __cplusplus } // extern "C" diff --git a/configure.ac b/configure.ac index 9c09bd368..fe392a5e1 100755 --- a/configure.ac +++ b/configure.ac @@ -105,15 +105,6 @@ AS_IF([test "x$enable_pmi" = "xyes"],[ AM_CONDITIONAL([USE_PMI2], [false]) ]) -# MDHIM support build option -AC_ARG_ENABLE([mdhim],[AS_HELP_STRING([--enable-mdhim],[Enable MDHIM build options.])]) -AS_IF([test "x$enable_mdhim" = "xyes"],[ - AM_CONDITIONAL([USE_MDHIM], [true]) - UNIFYFS_AC_LEVELDB -],[ - AM_CONDITIONAL([USE_MDHIM], [false]) -]) - AC_ARG_WITH(pkgconfigdir, [AS_HELP_STRING([--with-pkgconfigdir=DIR],[pkgconfig file in DIR @<:@LIBDIR/pkgconfig@:>@])], [pkgconfigdir=$withval], @@ -256,7 +247,14 @@ LINK_WRAPPERS+=",-wrap,munmap" # status functions LINK_WRAPPERS+=",-wrap,fstat" +AC_CHECK_FUNCS(fstat64,[ + LINK_WRAPPERS+=",-wrap,fstat64" +],[]) LINK_WRAPPERS+=",-wrap,stat" +AC_CHECK_FUNCS(stat64,[ + LINK_WRAPPERS+=",-wrap,stat64" +],[]) + AC_CHECK_FUNCS(statfs,[ LINK_WRAPPERS+=",-wrap,statfs" ],[]) @@ -267,12 +265,21 @@ AC_CHECK_FUNCS(fstatfs,[ AC_CHECK_FUNCS(__lxstat,[ LINK_WRAPPERS+=",-wrap,__lxstat" ],[]) +AC_CHECK_FUNCS(__lxstat64,[ + LINK_WRAPPERS+=",-wrap,__lxstat64" +],[]) AC_CHECK_FUNCS(__xstat,[ LINK_WRAPPERS+=",-wrap,__xstat" ],[]) +AC_CHECK_FUNCS(__xstat64,[ + LINK_WRAPPERS+=",-wrap,__xstat64" +],[]) AC_CHECK_FUNCS(__fxstat,[ LINK_WRAPPERS+=",-wrap,__fxstat" ],[]) +AC_CHECK_FUNCS(__fxstat64,[ + LINK_WRAPPERS+=",-wrap,__fxstat64" +],[]) AC_CHECK_FUNCS(posix_fadvise, [ LINK_WRAPPERS+=",-wrap,posix_fadvise" @@ -292,6 +299,7 @@ LINK_WRAPPERS+=",-wrap,rmdir" LINK_WRAPPERS+=",-wrap,fclose" LINK_WRAPPERS+=",-wrap,fflush" LINK_WRAPPERS+=",-wrap,fopen" +LINK_WRAPPERS+=",-wrap,fopen64" LINK_WRAPPERS+=",-wrap,freopen" LINK_WRAPPERS+=",-wrap,setbuf" LINK_WRAPPERS+=",-wrap,setvbuf" @@ -369,8 +377,6 @@ AC_SUBST(unifyfs_bin_path) AC_SUBST(LINK_WRAPPERS) AC_CONFIG_FILES([Makefile - meta/Makefile - meta/src/Makefile server/Makefile server/src/Makefile client/Makefile diff --git a/docs/assumptions.rst b/docs/assumptions.rst index f21b5fd2b..53440d478 100644 --- a/docs/assumptions.rst +++ b/docs/assumptions.rst @@ -116,7 +116,7 @@ Commit Consistency Semantics in UnifyFS ``fflush()``, ``close()``, or ``fsync()`` in the application source code, or by supplying the ``client.write_sync`` configuration parameter to UnifyFS on startup, which will cause an implicit `flush` - operation after every `write`` (note: use of the + operation after every `write` (note: use of the ``client.write_sync`` mode can significantly slow down write performance). In this case, inter-process synchronization is still required for applications that perform conflicting updates to files. diff --git a/docs/build.rst b/docs/build.rst index c269c02c8..b6f788bb2 100644 --- a/docs/build.rst +++ b/docs/build.rst @@ -143,7 +143,7 @@ configure and build UnifyFS from its source code directory. $ gotcha_install=$(spack location -i gotcha) $ spath_install=$(spack location -i spath) $ - $ ./autogen.sh + $ ./autogen.sh # skip if using release tarball $ ./configure --prefix=/path/to/install --with-gotcha=${gotcha_install} --with-spath=${spath_install} $ make $ make install @@ -200,7 +200,7 @@ As an example, the commands may look like: $ export PKG_CONFIG_PATH=$INSTALL_DIR/lib/pkgconfig:$INSTALL_DIR/lib64/pkgconfig:$PKG_CONFIG_PATH $ export LD_LIBRARY_PATH=$INSTALL_DIR/lib:$INSTALL_DIR/lib64:$LD_LIBRARY_PATH - $ ./autogen.sh + $ ./autogen.sh # skip if using release tarball $ ./configure --prefix=/path/to/install CPPFLAGS=-I/path/to/install/include LDFLAGS="-L/path/to/install/lib -L/path/to/install/lib64" $ make $ make install @@ -223,6 +223,7 @@ after ``./autogen.sh`` has been run. ---------- .. _configure-options-label: + ----------------- Configure Options ----------------- diff --git a/docs/conf.py b/docs/conf.py index e3b405230..c104bcdf0 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -28,7 +28,7 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -#extensions = [] +extensions = ['sphinx.ext.todo'] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -54,16 +54,16 @@ # built documents. # # The short X.Y version. -version = u'1.1' +version = u'2.0' # The full version, including alpha/beta/rc tags. -release = u'1.1' +release = u'2.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = 'en' # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: @@ -139,7 +139,7 @@ # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] -def setup(app): app.add_stylesheet("theme_overrides.css" ) +html_css_files = ['theme_overrides.css'] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied diff --git a/docs/dependencies.rst b/docs/dependencies.rst index 238b33183..7c6524cd3 100644 --- a/docs/dependencies.rst +++ b/docs/dependencies.rst @@ -55,11 +55,9 @@ UnifyFS specific error codes are defined as follows: 1002 GOTCHA Gotcha operation error 1003 KEYVAL Key-value store operation error 1004 MARGO Mercury/Argobots operation error - 1005 MDHIM MDHIM operation error - 1006 META Metadata store operation error - 1007 NYI Not yet implemented - 1008 PMI PMI2/PMIx error - 1009 SHMEM Shared memory region init/access error - 1010 THREAD POSIX thread operation failed - 1011 TIMEOUT Timed out + 1005 NYI Not yet implemented + 1006 PMI PMI2/PMIx error + 1007 SHMEM Shared memory region init/access error + 1008 THREAD POSIX thread operation failed + 1009 TIMEOUT Operation timed out ===== ========= ====================================== diff --git a/docs/images/UnifyFS-tutorial-May2022.png b/docs/images/UnifyFS-tutorial-May2022.png new file mode 100644 index 000000000..44b5abc65 Binary files /dev/null and b/docs/images/UnifyFS-tutorial-May2022.png differ diff --git a/docs/limitations.rst b/docs/limitations.rst index 424470c39..b30121791 100644 --- a/docs/limitations.rst +++ b/docs/limitations.rst @@ -340,22 +340,22 @@ since it may induce more file flush operations than necessary. PnetCDF Limitations ------------------- PnetCDF applications can utilize UnifyFS, -and in fact, the semantics of the `PnetCDF API`_ align well with UnifyFS constraints. - -PnetCDF parallelizes access to NetCDF files using MPI. -An MPI communicator is passed as an argument when opening a file. -Any collective call in PnetCDF is global across the process group -associated with the communicator used to open the file. +and the semantics of the `PnetCDF API`_ align well with UnifyFS constraints. +PnetCDF uses MPI-IO to read and write files. In addition to any restrictions required when using UnifyFS with PnetCDF, -one should also follow any recommendations regarding UnifyFS and the +one must follow any recommendations regarding UnifyFS and the underlying MPI-IO implementation. Data Consistency **************** -PnetCDF uses MPI-IO to read and write files, -and PnetCDF follows the data consistency model defined by MPI-IO. +PnetCDF parallelizes access to NetCDF files using MPI. +An MPI communicator is passed as an argument when opening a file. +Any collective call in PnetCDF is global across the process group +associated with the communicator used to open the file. + +PnetCDF follows the data consistency model defined by MPI-IO. Specifically, from its documentation about `PnetCDF data consistency`_: .. Note:: @@ -371,7 +371,7 @@ Specifically, from its documentation about `PnetCDF data consistency`_: users can explicitly call ``ncmpi_sync()``. In ``ncmpi_sync()``, ``MPI_File_sync()`` and ``MPI_Barrier()`` are called. -Upon inspection of the PnetCDF implementation, +Upon inspection of the implementation of the PnetCDF v1.12.3 release, the following PnetCDF functions include the following calls:: ncmpio_file_sync @@ -401,11 +401,30 @@ the following PnetCDF functions include the following calls:: If a program must read data written by another process, PnetCDF users must do one of the following when using UnifyFS: -1) Set ``UNIFYFS_CLIENT_WRITE_SYNC=1``, in which case each POSIX +1) Add explicit calls to ``ncmpi_sync()`` after writing and before reading. +2) Set ``UNIFYFS_CLIENT_WRITE_SYNC=1``, in which case each POSIX write operation invokes a flush. -2) Use ``NC_SHARE`` when opening files so that the PnetCDF library invokes +3) Use ``NC_SHARE`` when opening files so that the PnetCDF library invokes ``MPI_File_sync()`` and ``MPI_Barrier()`` calls after its MPI-IO operations. -3) Add explicit calls to ``ncmpi_sync()`` after writing and before reading. + +Of these options, +it is recommended that one add ``ncmpi_sync()`` calls where necessary. +Setting ``UNIFYFS_CLIENT_WRITE_SYNC=1`` is convenient since one does not +need to change the application, but it may have a larger impact on performance. +Opening or creating a file with ``NC_SHARE`` may work for some applications, +but it depends on whether the PnetCDF implementation +internally calls ``MPI_File_sync()`` at all appropriate places, +which is not guaranteed. + +A number of PnetCDF calls invoke write operations on the underlying file. +In addition to the ``ncmpi_put_*`` collection of calls +that write data to variables or attributes, +``ncmpi_enddef`` updates variable definitions, +and it can fill variables with default values. +Users may also explicitly fill variables by calling ``ncmpi_fill_var_rec()``. +One must ensure necessary ``ncmpi_sync()`` calls are placed between +any fill and write operations in case +they happen to write to overlapping regions of a file. Note that ``ncmpi_sync()`` calls ``MPI_File_sync()`` and ``MPI_Barrier()``, but it does not call ``MPI_File_sync()`` again after calling ``MPI_Barrier()``. @@ -427,4 +446,4 @@ as noted above. .. _VerifyIO README: https://github.com/uiuc-hpc/Recorder/tree/pilgrim/tools/verifyio#note-on-the-third-step .. _ROMIO hints file: https://wordpress.cels.anl.gov/romio/2008/09/26/system-hints-hints-via-config-file .. _PnetCDF API: https://parallel-netcdf.github.io/wiki/pnetcdf-api.pdf -.. _PnetCDF data consistency: https://github.com/Parallel-NetCDF/PnetCDF/blob/master/doc/README.consistency +.. _PnetCDF data consistency: https://github.com/Parallel-NetCDF/PnetCDF/blob/e47596438326bfa7b9ed0b3857800d3a0d09ff1a/doc/README.consistency.md diff --git a/docs/link.rst b/docs/link.rst index 8b29429de..4cc2c6bf9 100644 --- a/docs/link.rst +++ b/docs/link.rst @@ -86,3 +86,16 @@ Note the use of the ``-I`` option to specify the location of the library provides the Fortran bindings for the ``unifyfs_mount`` and ``unifyfs_unmount`` functions. +---------- + +---------- +LD_PRELOAD +---------- + +In the case where an application doesn't need to be rebuilt in order to use +UnifyFS (e.g., files paths are set wth arguments/configs), ``LD_PRELOAD`` can be +used at runtime to allow for UnifyFS interception of file I/O. + +.. code-block:: Bash + + $ srun -N2 -n8 --export=ALL,LD_PRELOAD=$UNIFYFS_LIB/libunifyfs_mpi_gotcha.so myApplication diff --git a/docs/overview.rst b/docs/overview.rst index 8f1c549e1..dae6a9a91 100644 --- a/docs/overview.rst +++ b/docs/overview.rst @@ -1,6 +1,6 @@ -================ +======== Overview -================ +======== UnifyFS is a user-level file system under active development that supports shared file I/O over distributed storage on HPC systems, @@ -25,10 +25,11 @@ Users must copy files that need to be persisted beyond the lifetime of the job from UnifyFS to a permanent file system. UnifyFS provides an API and a utility to perform these copies. +----- ---------------------------- +----------------- High Level Design ---------------------------- +----------------- .. image:: images/design-high-lvl.png @@ -52,3 +53,67 @@ the job allocation. The UnifyFS server handles the I/O requests from the UnifyFS library. The UnifyFS server uses ECP `Mochi `_ to communicate with user application processes and server processes on other nodes. + +----- + +---------------- +UnifyFS Citation +---------------- + +We recommend that you use this as the primary citation for UnifyFS as well as a +reference for further details on the UnifyFS architecture and semantics: + + Michael Brim, Adam Moody, Seung-Hwan Lim, Ross Miller, Swen Boehm, Cameron Stanavige, Kathryn Mohror, Sarp Oral, “UnifyFS: A User-level Shared File System for Unified Access to Distributed Local Storage,” 37th IEEE International Parallel & Distributed Processing Symposium (IPDPS 2023), St. Petersburg, FL, May 2023. + +----- + +-------------- +UnifyFS Videos +-------------- + +Quickstart +********** + +Quick overview on what UnifyFS is and how to use it. + +.. raw:: html + +
+ +
+ +----- + +ECP Tutorial +************ + +More in-depth recording of the problems UnifyFS solves and a how-to using a +pre-1.0 beta version. + +.. raw:: html + +
+ +
+ +.. rubric:: UnifyFS Tutorial Slides - ECP 2022 + +.. image:: images/UnifyFS-tutorial-May2022.png + :target: slides/UnifyFS-tutorial-May2022.pdf + :height: 72px + :align: left + :alt: UnifyFS Tutorial Slides - ECP 2022 + +:download:`Download PDF `. + +| + +.. rubric:: Changes since above ECP 2022 Tutorial: + +- Video 30:04 | Slide 19 - Variants ``boostsys`` and ``preload`` have since been + added. +- Video 31:08 | Slide 22 - OpenSSL is also a dependency that was missed on this slide. +- Video 41:10 | Slide 48 - User Guide url starts with *https*, not *http*. + +For full changes since the pre-1.0 beta version of UnifyFS used in the May 2022 +tutorial, see the `UnifyFS release notes `_. diff --git a/docs/requirements.in b/docs/requirements.in new file mode 100644 index 000000000..82133027c --- /dev/null +++ b/docs/requirements.in @@ -0,0 +1,2 @@ +sphinx +sphinx_rtd_theme diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 000000000..db315dd29 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,65 @@ +# +# This file is autogenerated by pip-compile with Python 3.12 +# by the following command: +# +# pip-compile --output-file=docs/requirements.txt --strip-extras docs/requirements.in +# +alabaster==0.7.13 + # via sphinx +babel==2.14.0 + # via sphinx +certifi==2023.11.17 + # via requests +charset-normalizer==3.3.2 + # via requests +docutils==0.20.1 + # via + # sphinx + # sphinx-rtd-theme +idna==3.6 + # via requests +imagesize==1.4.1 + # via sphinx +jinja2==3.1.2 + # via sphinx +markupsafe==2.1.3 + # via jinja2 +packaging==23.2 + # via sphinx +pygments==2.17.2 + # via sphinx +requests==2.31.0 + # via sphinx +snowballstemmer==2.2.0 + # via sphinx +sphinx==7.2.6 + # via + # -r docs/requirements.in + # sphinx-rtd-theme + # sphinxcontrib-applehelp + # sphinxcontrib-devhelp + # sphinxcontrib-htmlhelp + # sphinxcontrib-jquery + # sphinxcontrib-qthelp + # sphinxcontrib-serializinghtml +sphinx-rtd-theme==2.0.0 + # via -r docs/requirements.in +sphinxcontrib-applehelp==1.0.7 + # via sphinx +sphinxcontrib-devhelp==1.0.5 + # via sphinx +sphinxcontrib-htmlhelp==2.0.4 + # via sphinx +sphinxcontrib-jquery==4.1 + # via sphinx-rtd-theme +sphinxcontrib-jsmath==1.0.1 + # via sphinx +sphinxcontrib-qthelp==1.0.6 + # via sphinx +sphinxcontrib-serializinghtml==1.1.9 + # via sphinx +urllib3==2.1.0 + # via requests + +# The following packages are considered to be unsafe in a requirements file: +# setuptools diff --git a/docs/slides/UnifyFS-tutorial-May2022.pdf b/docs/slides/UnifyFS-tutorial-May2022.pdf new file mode 100644 index 000000000..552313a94 Binary files /dev/null and b/docs/slides/UnifyFS-tutorial-May2022.pdf differ diff --git a/docs/wrappers.rst b/docs/wrappers.rst index a06695791..e5598ee5a 100644 --- a/docs/wrappers.rst +++ b/docs/wrappers.rst @@ -100,7 +100,7 @@ unifyfs_list.txt The unifyfs_list.txt_ file specifies the set of wrappers in UnifyFS. Most but not all such wrappers are supported. The command to build unifyfs list: -.. code-block:: +.. code-block:: Bash grep UNIFYFS_WRAP ../src/\*.c > unifyfs_list.txt diff --git a/m4/ax_lib_hdf5.m4 b/m4/ax_lib_hdf5.m4 index 5a50442af..d09f70660 100644 --- a/m4/ax_lib_hdf5.m4 +++ b/m4/ax_lib_hdf5.m4 @@ -188,9 +188,9 @@ HDF5 support is being disabled (equivalent to --with-hdf5=no). HDF5_SHOW=$(eval $H5CC -show) dnl Get the actual compiler used - HDF5_CC=$(eval $H5CC -show | $AWK '{print $[]1}') + HDF5_CC=$(eval $H5CC -show | $AWK 'NR==1{print $[]1}') if test "$HDF5_CC" = "ccache"; then - HDF5_CC=$(eval $H5CC -show | $AWK '{print $[]2}') + HDF5_CC=$(eval $H5CC -show | $AWK 'NR==1{print $[]2}') fi dnl h5cc provides both AM_ and non-AM_ options diff --git a/m4/openssl.m4 b/m4/openssl.m4 index c40ac5cd2..d3bc20296 100644 --- a/m4/openssl.m4 +++ b/m4/openssl.m4 @@ -13,6 +13,20 @@ AC_DEFUN([UNIFYFS_AC_OPENSSL], [ couldn't find a suitable openssl-devel ]))]) + + + + AC_CHECK_LIB([crypto], [EVP_Digest], + [ + AM_CONDITIONAL([HAVE_OPENSSL_EVP], [true]) + ], + [ + AC_MSG_ERROR([couldn't find a sufficiently new OpenSSL installation]) + ], + [] + ) + + # restore flags CFLAGS=$OPENSSL_OLD_CFLAGS CXXFLAGS=$OPENSSL_OLD_CXXFLAGS diff --git a/meta/.gitignore b/meta/.gitignore deleted file mode 100644 index 527416fcb..000000000 --- a/meta/.gitignore +++ /dev/null @@ -1,26 +0,0 @@ -# Compiled Object files -*.slo -*.lo -*.o -*.obj - -# Compiled Dynamic libraries -*.so -*.so* -*.dylib -*.dll - -# Compiled Static libraries -*.lai -*.la -*.a -*.lib - -# Executables -*.exe -*.out -*.app - -# Makefile and Makefile config -#Makefile -#Makefile.cfg diff --git a/meta/Makefile.am b/meta/Makefile.am deleted file mode 100644 index af437a64d..000000000 --- a/meta/Makefile.am +++ /dev/null @@ -1 +0,0 @@ -SUBDIRS = src diff --git a/meta/README.md b/meta/README.md deleted file mode 100644 index e3bdddebf..000000000 --- a/meta/README.md +++ /dev/null @@ -1,60 +0,0 @@ -MDHIM - Multi-Dimensional Hashing Indexing Middleware - -Description ---------------- -This version of MDHIM package is customized to support UnifyFS. -In order to test these extended interfaces and implementations, -two test files are provided in the directory tests/singletests -(range_bget.c and range_test.c). The test scripts for these two -files are range_bget.sh range_test.sh. - -MDHIM is a parallel key/value store framework written in MPI. -Unlike other big data solutions, MDHIM has been designed for an HPC -environment and to take advantage of high speed networks. - -Building the Tests ---------------- -1. cd tests -2. Type: make -3. If all went well, you have all the tests compiled - -mdhimtst (mdhimtst.c) ---------------- -Typical setup to run file: ./mdhimtst -ftest.txt -q -t5 -d3 -r0~2 -w1 -p ./ - -Typical batch file setup: - -put single_insert.txt -get single_insertget.txt -del single_insertdel.txt - -Batch command file setup: - -For put get del bput bget bdel -[command] [#of items in file(if necessary)] [file containg key and value (if necessary)] - -For nput ngetn (random numbers are generated for this function) -[#number of records to put/get] [key size (only applicapble for byte/strng)] -[value size (nput only)] [size correctance number] - -For flush (only command is used to flush) - -Parameters: - -f (file with batch commands) - -d (Type of DB to use: levelDB=1 mysql=3) - -t (Type of keys: int=1, longInt=2, float=3, double=4, longDouble=5, - string=6, byte=7) - -p (path where DB will be created) - -n (Name of DataBase file or directory) - -b (MLOG_CRIT=1, MLOG_DBG=2) - -a (DB store append mode. By default records with same key are overwritten. - This flag turns on the option to append to existing values. - -w This flag turns on the option to either allow or deny threads to - do command based on if it is dividiable by the modlus of the modulus number - -r ~ This flag turns on the option to either - allow or deny threads to do command based on if the rank falls inclusively inbetween - the rank ranges. NOTE: You must use the '~' inbetween the numbers. Example: -r0~2 - -q<0|1> (Quiet mode, default is verbose) 1=write out to log file - - - diff --git a/meta/src/Makefile.am b/meta/src/Makefile.am deleted file mode 100644 index dd094febe..000000000 --- a/meta/src/Makefile.am +++ /dev/null @@ -1,43 +0,0 @@ -if USE_MDHIM -noinst_LIBRARIES = libmdhim.a -endif - -libmdhim_a_SOURCES = Mlog2/mlog2.c \ - Mlog2/mlog2.h \ - Mlog2/mlogfacs2.h \ - client.c \ - client.h \ - local_client.c \ - local_client.h \ - data_store.c \ - data_store.h \ - partitioner.c \ - partitioner.h \ - messages.c \ - messages.h \ - range_server.c \ - range_server.h \ - ds_leveldb.c \ - ds_leveldb.h \ - mdhim_options.c \ - mdhim_options.h \ - mdhim_private.c \ - mdhim_private.h \ - indexes.c \ - indexes.h \ - mdhim.c \ - mdhim.h \ - uthash/uthash.h - -# target_LDFLAGS is not valid for library archive targets -#libmdhim_a_LDFLAGS = $(AM_LDFLAGS) $(LEVELDB_LDFLAGS) $(LEVELDB_LIBS) $(MPI_CLDFLAGS) - -AM_CPPFLAGS = -I$(top_srcdir)/meta/src/Mlog2 \ - -I$(top_srcdir)/meta/src/uthash \ - -I$(top_srcdir)/common/src \ - -I$(top_srcdir)/server/src - -AM_CFLAGS = -DLEVELDB_SUPPORT $(LEVELDB_CFLAGS) $(MPI_CFLAGS) $(MARGO_CFLAGS) -AM_CFLAGS += -Wall -Werror - -CLEANFILES = diff --git a/meta/src/Mlog2/makefacs.pl b/meta/src/Mlog2/makefacs.pl deleted file mode 100644 index 594a447ea..000000000 --- a/meta/src/Mlog2/makefacs.pl +++ /dev/null @@ -1,198 +0,0 @@ -#!/usr/bin/env perl -# -# The Self-* Storage System Project -# Copyright (c) 2004-2011, Carnegie Mellon University. -# All rights reserved. -# http://www.pdl.cmu.edu/ (Parallel Data Lab at Carnegie Mellon) -# -# This software is being provided by the copyright holders under the -# following license. By obtaining, using and/or copying this software, -# you agree that you have read, understood, and will comply with the -# following terms and conditions: -# -# Permission to reproduce, use, and prepare derivative works of this -# software is granted provided the copyright and "No Warranty" statements -# are included with all reproductions and derivative works and associated -# documentation. This software may also be redistributed without charge -# provided that the copyright and "No Warranty" statements are included -# in all redistributions. -# -# NO WARRANTY. THIS SOFTWARE IS FURNISHED ON AN "AS IS" BASIS. -# CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER -# EXPRESSED OR IMPLIED AS TO THE MATTER INCLUDING, BUT NOT LIMITED -# TO: WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY -# OF RESULTS OR RESULTS OBTAINED FROM USE OF THIS SOFTWARE. CARNEGIE -# MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT -# TO FREEDOM FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. -# COPYRIGHT HOLDERS WILL BEAR NO LIABILITY FOR ANY USE OF THIS SOFTWARE -# OR DOCUMENTATION. -# - -# makefacs.pl autogen mlogfacs.h file based on facs list - -@facs = ( -# names: -# abbreviated long - "MDHIM_CLIENT" => "MDHIM client", - "MDHIM_SERVER" => "MDHIM range server", - "STORE" => "storage", - "MPI" => "MPI" -); - -@mloglvls = ( -# format: main_name:alias1:alias2:... - "MLOG_EMERG", # emergency - "MLOG_ALERT", # alert - "MLOG_CRIT", # critical - "MLOG_ERR", # error - "MLOG_WARN", # warning - "MLOG_NOTE", # notice - "MLOG_INFO", # info - "MLOG_DBG", # all debug streams - "MLOG_DBG0:DAPI", # debug stream 0 - "MLOG_DBG1:DINTAPI", # debug stream 1 - "MLOG_DBG2:DCOMMON", # debug stream 2 - "MLOG_DBG3:DRARE", # debug stream 3 -); - -###################################################################### -# end of configuration section -###################################################################### - -die "odd sized facs[] array" if (($#facs+1) & 1); - -open(P, ">mlogfacs.h_NEW") || die "cannot open mlogfacs.h_NEW ($!)"; - -use File::Basename; -my $myname = basename($0); - -$_ = < -#include -#include -#include -#include -#include -#include -#include -#include /* for strncasecmp */ -#include -#include -#include - -#ifdef MLOG_MUTEX -#include -#endif - -#include -#include -#include -#include - -#include -#include -#include - -#include "mlog2.h" - - -/* - * dispose of the mlog() macro function, if it is defined. we need - * the real thing here... - */ -#ifdef mlog -#undef mlog -#endif /* mlog */ - -#define MLOG_TAGPAD 16 /* extra tag bytes to alloc for a pid */ - -/** - * message buffer header: lives at the start of a message buffer, is - * malloced with it, and contains pointers + metainfo. - */ -struct mlog_mbhead { -#define MBH_START ">CpMdUl<" - char mbh_start[8]; /*!< magic string that marks start of msgbuf */ - uint32_t mbh_beef; /*!< 0xdeadbeef, for checking byte order */ - uint32_t mbh_len; /*!< length of buffer (not including header) */ - uint32_t mbh_cnt; /*!< number of bytes currently in buffer */ - uint32_t mbh_wp; /*!< write pointer */ -}; - -/** - * internal global state - */ -struct mlog_state { - /* note: tag, mlog_facs, and fac_cnt are in xstate now */ - - int def_mask; /*!< default facility mask value */ - int stderr_mask; /*!< mask above which we send to stderr */ - char *logfile; /*!< logfile name [malloced] */ - int logfd; /*!< fd of the open logfile */ - int oflags; /*!< open flags */ - int fac_alloc; /*!< # of slots in facs[] (>=fac_cnt) */ - unsigned char *mb; /*!< message buffer [malloced] */ - int udpsock; /*!< udp socket for cons output */ - int ucon_cnt; /*!< number of UDP output targets */ - int ucon_nslots; /*!< # of malloc'd entries in ucons[] */ - struct sockaddr_in *ucons; /*!< UDP output targets [malloced] */ - struct utsname uts; /*!< for hostname, from uname(3) */ - void (*abort_hook)(void); /*!< abort hook for mlog_abort() */ - int stdout_isatty; /*!< non-zero if stdout is a tty */ - int stderr_isatty; /*!< non-zero if stderr is a tty */ -#ifdef MLOG_MUTEX - pthread_mutex_t mlogmux; /*!< protect mlog in threaded env */ -#endif -}; - -/* - * global data. this sets mlog_xst.tag to 0, meaning the log is not open. - * this is global so mlog_filter() in mlog.h can get at it. - */ -struct mlog_xstate mlog_xst = { 0 }; - -/* - * static data. - */ -static struct mlog_state mst = { 0 }; -static int mlogsyslog[] = { - LOG_DEBUG, /* MLOG_DBUG */ - LOG_INFO, /* MLOG_INFO */ - LOG_NOTICE, /* MLOG_NOTE */ - LOG_WARNING, /* MLOG_WARN */ - LOG_ERR, /* MLOG_ERR */ - LOG_CRIT, /* MLOG_CRIT */ - LOG_ALERT, /* MLOG_ALERT */ - LOG_EMERG, /* MLOG_EMERG */ -}; -static const char *default_fac0name = "MLOG"; /* default name for facility 0 */ - -/* - * macros - */ -#ifdef MLOG_MUTEX -#define mlog_lock() pthread_mutex_lock(&mst.mlogmux) -#define mlog_unlock() pthread_mutex_unlock(&mst.mlogmux) -#else -#define mlog_lock() /* nothing */ -#define mlog_unlock() /* nothing */ -#endif - -/* - * local prototypes - */ -static void mlog_getmbptrs(char **, int *, char **, int *); -static void mlog_dmesg_mbuf(char **b1p, int *b1len, char **b2p, int *b2len); -static int mlog_getucon(int, struct sockaddr_in *, char *); -static const char *mlog_pristr(int); -static int mlog_resolvhost(struct sockaddr_in *, char *, char *); -static int mlog_setnfac(int); -static uint32_t wswap(uint32_t); -static void vmlog(int, const char *, va_list); - -/* - * local helper functions - */ - -/** - * mlog_getmbptrs: get pointers to the message buffer and their sizes, - * based on the current value of the write pointer. if the write pointer - * is zero, then the entire buffer is in one and twolen is set to zero. - * note that the _caller_ must check that mst.mb is valid, we assume it is. - * caller must hold mlog_lock. - * - * @param one pointer to first part of circular buffer - * @param onelen length of the one buffer - * @param two pointer to second part of circular buffer, if any - * @param twolen length of the two buffer - */ -static void mlog_getmbptrs(char **one, int *onelen, char **two, int *twolen) -{ - uint32_t wp; - wp = ((struct mlog_mbhead *)mst.mb)->mbh_wp; - *one = ((char *) mst.mb) + sizeof(struct mlog_mbhead) + wp; - *onelen = ((struct mlog_mbhead *)mst.mb)->mbh_len - wp; - *two = ((char *) mst.mb) + sizeof(struct mlog_mbhead); - *twolen = wp; -} - -/** - * mlog_dmesg_mbuf: obtain pointers to the current contents of the - * message buffer. since the message buffer is circular, the result - * may come back in two pieces. - * caller must hold mlog_lock. - * - * @param b1p returns pointer to first buffer here - * @param b1len returns length of data in b1 - * @param b2p returns pointer to second buffer here (null if none) - * @param b2len returns length of b2 or zero if b2 is null - */ -static void mlog_dmesg_mbuf(char **b1p, int *b1len, char **b2p, int *b2len) -{ - uint32_t skip; - /* get pointers */ - mlog_getmbptrs(b1p, b1len, b2p, b2len); - /* if the buffer wasn't full, we need to adjust the pointers */ - skip = ((struct mlog_mbhead *)mst.mb)->mbh_len - - ((struct mlog_mbhead *)mst.mb)->mbh_cnt; - if (skip >= *b1len) { /* skip entire first buffer? */ - skip -= *b1len; - *b1p = *b2p; - *b2p = 0; - *b1len = *b2len; - *b2len = 0; - } - if (skip) { - *b1p = *b1p + skip; - *b1len = *b1len - skip; - } - return; -} - -/** - * mlog_getucon: helper function that parses hostname/port info from - * a string into an array of sockaddr_in structures. the string format - * is: host1:port1;host2:port2;host3:port3 ... - * does not access global mlog state. - * - * @param cnt number of sockaddr_in structures alloced for ads[] array - * @param ads an array of sockaddr_in structures we fill in - * @param dcon text string to load ucon info from - * @return number of host/port entries resolved. - */ -static int mlog_getucon(int cnt, struct sockaddr_in *ads, char *dcon) -{ - int rv; - char *p, *hst, *col, *port; - p = dcon; - rv = 0; - while (*p && rv < cnt) { - hst = p; - while (*p && *p != ':') { - p++; - } - if (*p != ':') { - fprintf(stderr, "MLOG_UCON: parse error: missing ':'\n"); - break; - } - col = p++; - port = p; - while (*p && *p != ';') { - p++; - } - if (*p == ';') { - p++; - } - *col = 0; - if (mlog_resolvhost(&ads[rv], hst, port) != -1) { - rv++; - } - *col = ':'; - } - return(rv); -} - -/* - * static arrays for converting between pri's and strings - */ -static const char *norm[] = { "DBUG", "INFO", "NOTE", "WARN", - "ERR ", "CRIT", "ALRT", "EMRG" - }; -static const char *dbg[] = { "D---", "D3--", "D2--", "D23-", - "D1--", "D13-", "D12-", "D123", - "D0--", "D03-", "D02-", "D023", - "D01-", "D013", "D012", "DBUG" - }; -/** - * mlog_pristr: convert priority to 4 byte symbolic name. - * does not access mlog global state. - * - * @param pri the priority to convert to a string - * @return the string (symbolic name) of the priority - */ -static const char *mlog_pristr(int pri) -{ - int s; - pri = pri & MLOG_PRIMASK; /* be careful */ - s = (pri >> MLOG_PRISHIFT) & 7; - if (s) { - return(norm[s]); - } - s = (pri >> MLOG_DPRISHIFT) & 15; - return(dbg[s]); -} - -/** - * mlog_resolvhost: another helper function that does hostname resolution. - * does not access mlog global state. - * - * @param sinp pointer to sockaddr_in that we fill in based on h/p - * @param h hostname to resolve; also handles IP address - * @param p port number - * @return -1 on error, 0 otherwise. - */ -static int mlog_resolvhost(struct sockaddr_in *sinp, char *h, char *p) -{ - struct hostent *he; - memset(sinp, 0, sizeof(*sinp)); - sinp->sin_family = AF_INET; - sinp->sin_port = htons(atoi(p)); - if (*h >= '0' && *h <= '9') { - sinp->sin_addr.s_addr = inet_addr(h); - if (sinp->sin_addr.s_addr == 0 || - sinp->sin_addr.s_addr == ((in_addr_t) -1)) { - fprintf(stderr, "MLOG_UCON: invalid host %s\n", h); - return(-1); - } - } else { - he = gethostbyname(h); /* likely to block here */ - if (!he || he->h_addrtype != AF_INET || - he->h_length != sizeof(in_addr_t) || !he->h_addr) { - fprintf(stderr, "MLOG_UCON: invalid host %s\n", h); - return(-1); - } - memcpy(&sinp->sin_addr.s_addr, he->h_addr, he->h_length); - } - return(0); -} - -/** - * mlog_setnfac: set the number of facilites allocated (including default - * to a given value). mlog must be open for this to do anything. - * we set the default name for facility 0 here. - * caller must hold mlog_lock. - * - * @param n the number of facilities to allocate space for now. - * @return -1 on error. - */ -static int mlog_setnfac(int n) -{ - int try, lcv; - struct mlog_fac *nfacs; - - /* - * no need to check mlog_xst.tag to see if mlog is open or not, - * since caller holds mlog_lock already it must be ok. - */ - - /* hmm, already done */ - if (n <= mlog_xst.fac_cnt) { - return(0); - } - /* can we expand in place? */ - if (n <= mst.fac_alloc) { - mlog_xst.fac_cnt = n; - return(0); - } - /* must grow the array */ - try = (n < 1024) ? (n + 32) : n; /* pad a bit for small values of n */ - nfacs = calloc(1, try * sizeof(*nfacs)); - if (!nfacs) { - return(-1); - } - /* over the hump, setup the new array */ - lcv = 0; - if (mlog_xst.mlog_facs && mlog_xst.fac_cnt) { /* copy old? */ - for (/*null*/ ; lcv < mlog_xst.fac_cnt ; lcv++) { - nfacs[lcv] = mlog_xst.mlog_facs[lcv]; /* struct copy */ - } - } - for (/*null*/ ; lcv < try ; lcv++) { /* init the new */ - nfacs[lcv].fac_mask = mst.def_mask; - nfacs[lcv].fac_aname = (lcv == 0) ? (char *)default_fac0name : NULL; - nfacs[lcv].fac_lname = NULL; - } - /* install */ - if (mlog_xst.mlog_facs) { - free(mlog_xst.mlog_facs); - } - mlog_xst.mlog_facs = nfacs; - mlog_xst.fac_cnt = n; - mst.fac_alloc = try; - return(0); -} - -/** - * mlog_bput: copy a string to a buffer, counting the bytes - * - * @param bpp pointer to output pointer (we advance it) - * @param skippy pointer to bytes to skip - * @param residp pointer to length of buffer remaining - * @param totcp pointer to total bytes moved counter - * @param str the string to copy in (null to just add a \0) - */ -static void mlog_bput(char **bpp, int *skippy, int *residp, int *totcp, - const char *str) -{ - static const char *nullsrc = "X\0\0"; /* 'X' is a non-null dummy char */ - const char *sp; - if (str == NULL) { /* trick to allow a null insert */ - str = nullsrc; - } - for (sp = str ; *sp ; sp++) { - if (sp == nullsrc) { - sp++; /* skip over 'X' to null */ - } - if (totcp) { - (*totcp)++; /* update the total */ - } - if (skippy && *skippy > 0) { - (*skippy)--; /* honor skip */ - continue; - } - if (*residp > 0 && *bpp != NULL) { /* copyout if buffer w/space */ - **bpp = *sp; - (*bpp)++; - (*residp)--; - } - } - return; -} - -/** - * wswap: swap byte order of a 32bit word - * does not access mlog global state. - * - * @param w the 32bit int to swap - * @return the swapped version of the "w" - */ -static uint32_t wswap(uint32_t w) -{ - return( (w >> 24) | - ((w >> 16) & 0xff) << 8 | - ((w >> 8) & 0xff) << 16 | - ((w ) & 0xff) << 24 ); -} - -/** - * mlog_cleanout: release previously allocated resources (e.g. from a - * close or during a failed open). this function assumes the mlogmux - * has been allocated (caller must ensure that this is true or we'll - * die when attempting a mlog_lock()). we will dispose of mlogmux. - * (XXX: might want to switch over to a PTHREAD_MUTEX_INITIALIZER for - * mlogmux at some point?). - * - * the caller handles cleanout of mlog_xst.tag (not us). - */ -static void mlog_cleanout() -{ - int lcv; - mlog_lock(); - if (mst.logfile) { - if (mst.logfd >= 0) { - close(mst.logfd); - } - mst.logfd = -1; - free(mst.logfile); - mst.logfile = NULL; - } - if (mlog_xst.mlog_facs) { - /* - * free malloced facility names, being careful not to free - * the static default_fac0name.... - */ - for (lcv = 0 ; lcv < mst.fac_alloc ; lcv++) { - if (mlog_xst.mlog_facs[lcv].fac_aname && - mlog_xst.mlog_facs[lcv].fac_aname != default_fac0name) { - free(mlog_xst.mlog_facs[lcv].fac_aname); - } - if (mlog_xst.mlog_facs[lcv].fac_lname) { - free(mlog_xst.mlog_facs[lcv].fac_lname); - } - } - free(mlog_xst.mlog_facs); - mlog_xst.mlog_facs = NULL; - mlog_xst.fac_cnt = mst.fac_alloc = 0; - } - if (mst.mb) { - free(mst.mb); - mst.mb = NULL; - } - if (mst.udpsock >= 0) { - close(mst.udpsock); - mst.udpsock = -1; - } - if (mst.ucons) { - free(mst.ucons); - mst.ucons = NULL; - } - if (mst.oflags & MLOG_SYSLOG) { - closelog(); - } - mlog_unlock(); -#ifdef MLOG_MUTEX - pthread_mutex_destroy(&mst.mlogmux); -#endif -} - - -/** - * vmlog: core log function, front-ended by mlog/mlog_abort/mlog_exit. - * we vsnprintf the message into a holding buffer to format it. then we - * send it to all target output logs. the holding buffer is set to - * MLOG_TBSIZ, if the message is too long it will be silently truncated. - * caller should not hold mlog_lock, vmlog will grab it as needed. - * - * @param flags the flags (mainly fac+pri) for this log message - * @param fmt the printf(3) format to use - * @param ap the stdargs va_list to use for the printf format - */ -static void vmlog(int flags, const char *fmt, va_list ap) -{ -#define MLOG_TBSIZ 4096 /* bigger than any line should be */ - int fac, lvl, msk; - char b[MLOG_TBSIZ], *bp, *b_nopt1hdr; - char facstore[16], *facstr; - struct timeval tv; - struct tm *tm; - int hlen_pt1, hlen, mlen, tlen, thisflag; - int resid; - char *m1, *m2; - int m1len, m2len, ncpy; - //since we ignore any potential errors in MLOG let's always re-set - //errno to its orginal value - int save_errno = errno; - struct mlog_mbhead *mb; - /* - * make sure the mlog is open - */ - if (!mlog_xst.tag) { - return; - } - /* - * first, see if we can ignore the log messages because it is - * masked out. if debug messages are masked out, then we just - * directly compare levels. if debug messages are not masked, - * then we allow all non-debug messages and for debug messages we - * check to make sure the proper bit is on. [apps that don't use - * the debug bits just log with MLOG_DBG which has them all set] - */ - fac = flags & MLOG_FACMASK; - lvl = flags & MLOG_PRIMASK; - /* convert unknown facilities to default so we don't drop log msg */ - if (fac >= mlog_xst.fac_cnt) { - fac = 0; - } - msk = mlog_xst.mlog_facs[fac].fac_mask; - if (lvl >= MLOG_INFO) { /* normal mlog message */ - if (lvl < msk) { - errno = save_errno; - return; /* skip it */ - } - if (mst.stderr_mask != 0 && lvl >= mst.stderr_mask) { - flags |= MLOG_STDERR; - } - } else { /* debug mlog message */ - /* - * note: if (msk >= MLOG_INFO), then all the mask's debug bits - * are zero (meaning debugging messages are masked out). thus, - * for messages with the debug level we only have to do a bit - * test. - */ - if ((lvl & msk) == 0) { /* do we want this type of debug msg? */ - errno = save_errno; - return; /* no! */ - } - if ((lvl & mst.stderr_mask) != 0) { /* same thing for stderr_mask */ - flags |= MLOG_STDERR; - } - } - /* - * we must log it, start computing the parts of the log we'll need. - */ - mlog_lock(); /* lock out other threads */ - if (mlog_xst.mlog_facs[fac].fac_aname) { - facstr = mlog_xst.mlog_facs[fac].fac_aname; - } else { - snprintf(facstore, sizeof(facstore), "%d", fac); - facstr = facstore; - } - (void) gettimeofday(&tv, 0); - tm = localtime(&tv.tv_sec); - thisflag = (mst.oflags | flags); - /* - * ok, first, put the header into b[] - */ - hlen = snprintf(b, sizeof(b), - "%04d/%02d/%02d-%02d:%02d:%02d.%02ld %s %s ", - tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday, - tm->tm_hour, tm->tm_min, tm->tm_sec, - (long int)tv.tv_usec / 10000, mst.uts.nodename, - mlog_xst.tag); - hlen_pt1 = hlen; /* save part 1 length */ - if (hlen < sizeof(b)) { - hlen += snprintf(b + hlen, sizeof(b) - hlen, "%-4s %s ", - facstr, mlog_pristr(lvl)); - } - /* - * we expect there is still room (i.e. at least one byte) for a - * message, so this overflow check should never happen, but let's - * check for it anyway. - */ - if (hlen + 1 >= sizeof(b)) { - mlog_unlock(); /* drop lock, this is the only early exit */ - fprintf(stderr, "mlog: header overflowed %zd byte buffer (%d)\n", - sizeof(b), hlen + 1); - errno = save_errno; - return; - } - /* - * now slap in the user's data at the end of the buffer - */ - mlen = vsnprintf(b + hlen, sizeof(b) - hlen, fmt, ap); - /* - * compute total length, check for overflows... make sure the string - * ends in a newline. - */ - tlen = hlen + mlen; - /* if overflow or totally full without newline at end ... */ - if (tlen >= sizeof(b) || - (tlen == sizeof(b) - 1 && b[sizeof(b)-2] != '\n') ) { - tlen = sizeof(b) - 1; /* truncate, counting final null */ - /* - * could overwrite the end of b with "[truncated...]" or - * something like that if we wanted to note the problem. - */ - b[sizeof(b)-2] = '\n'; /* jam a \n at the end */ - } else { - /* it fit, make sure it ends in newline */ - if (b[tlen - 1] != '\n') { - b[tlen++] = '\n'; - b[tlen] = 0; - } - } - b_nopt1hdr = b + hlen_pt1; - /* - * multilog message is now ready to be dispatched. - */ - /* - * 1: log it to the message buffer (note: mlog still locked) - */ - mb = (struct mlog_mbhead *)mst.mb; - if (mb) { - resid = tlen; - bp = b; - /* wont fit? truncate... */ - if (resid > mb->mbh_len) { - bp = b + resid - mb->mbh_len; - resid = mb->mbh_len; - } - mlog_getmbptrs(&m1, &m1len, &m2, &m2len); - ncpy = resid; - if (ncpy > m1len) { - ncpy = m1len; - } - memcpy(m1, bp, ncpy); - resid -= ncpy; - if (resid) { - bp += ncpy; - memcpy(m2, bp, resid); - } - /* update write pointer */ - if (tlen < mb->mbh_len) { - mb->mbh_wp += tlen; - if (mb->mbh_wp >= mb->mbh_len) { - mb->mbh_wp -= mb->mbh_len; - } - } - if (mb->mbh_cnt < mb->mbh_len) { - mb->mbh_cnt += tlen; - if (mb->mbh_cnt > mb->mbh_len) { - mb->mbh_cnt = mb->mbh_len; - } - } - } - /* - * locking options: b[] is current an auto var on the stack. - * this costs stack space, but means we can unlock earlier. - * (if b[] was static, you'd hold the lock until the end.) - * clearly the logfile needs mst.logfd to be stable, and the - * UCON walks the mst.ucons[] array... - * - * neither the stderr/out or syslog access parts of mst that - * change, so we don't really need it locked for that? - */ - /* - * 2: log it to the log file - */ - if (mst.logfd >= 0) { - if (write(mst.logfd, b, tlen) != 0) - fprintf(stderr, "mlog: Failed to write(): %s", strerror(errno)); - } - /* - * 3: log it to the UCONs (UDP console) [mst.oflags' MLOG_UCON_ON bit - * can only be set if there is a valid mst.udpsock open] - */ - if (mst.oflags & MLOG_UCON_ON) { - for (ncpy = 0 ; ncpy < mst.ucon_cnt ; ncpy++) - (void) sendto(mst.udpsock, b, tlen, 0, - (struct sockaddr *)&mst.ucons[ncpy], - sizeof(mst.ucons[ncpy])); - } - mlog_unlock(); /* drop lock here */ - /* - * 4: log it to stderr and/or stdout. skip part one of the header - * if the output channel is a tty - */ - if (thisflag & MLOG_STDERR) { - if (mst.stderr_isatty) { - fprintf(stderr, "%s", b_nopt1hdr); - } else { - fprintf(stderr, "%s", b); - } - } - if (thisflag & MLOG_STDOUT) { - if (mst.stderr_isatty) { - printf("%s", b_nopt1hdr); - } else { - printf("%s", b); - } - fflush(stdout); - } - /* - * 5: log it to syslog - */ - if (mst.oflags & MLOG_SYSLOG) { - b[tlen - 1] = 0; /* syslog doesn't want the \n */ - syslog(mlogsyslog[lvl >> MLOG_PRISHIFT], "%s", b_nopt1hdr); - b[tlen - 1] = '\n'; /* put \n back, just to be safe */ - } - /* - * done! - */ - errno = save_errno; - return; -} - -/* - * API functions - */ - -/* - * mlog_str2pri: convert a priority string to an int pri value to allow - * for more user-friendly programs. returns -1 (an invalid pri) on error. - * does not access mlog global state. - */ -int mlog_str2pri(const char *pstr) -{ - char ptmp[8]; - int lcv; - /* make sure we have a valid input */ - if (strlen(pstr) > 5) { - return(-1); - } - strcpy(ptmp, pstr); /* because we may overwrite parts of it */ - /* - * handle some quirks - */ - if (strcasecmp(ptmp, "ERR") == 0) { /* has trailing space in the array */ - return(MLOG_ERR); - } - if (strcasecmp(ptmp, "DEBUG") == 0) { /* 5 char alternative to 'DBUG' */ - return(MLOG_DBG); - } - if (ptmp[0] == 'D') { /* allow shorthand without the '-' chars */ - while (strlen(ptmp) < 4) { - strcat(ptmp, "-"); - } - } - /* - * do non-debug first, then debug - */ - for (lcv = 1 ; lcv <= 7 ; lcv++) { - if (strcasecmp(ptmp, norm[lcv]) == 0) { - return(lcv << MLOG_PRISHIFT); - } - } - for (lcv = 0 ; lcv < 16 ; lcv++) { - if (strcasecmp(ptmp, dbg[lcv]) == 0) { - return(lcv << MLOG_DPRISHIFT); - } - } - /* bogus! */ - return(-1); -} - -/* - * mlog_open: open a multilog (uses malloc, inits global state). you - * can only have one multilog open at a time, but you can use multiple - * facilities. - * - * if an mlog is already open, then this call will fail. if you use - * the message buffer, you prob want it to be 1K or larger. - * - * return 0 on success, -1 on error. - */ -int mlog_open(char *tag, int maxfac_hint, int default_mask, int stderr_mask, - char *logfile, int msgbuf_len, int flags, int syslogfac) -{ - int tagblen; - char *newtag, *dcon, *cp; - struct mlog_mbhead *mb; - /* quick sanity check (mst.tag is non-null if already open) */ - if (mlog_xst.tag || !tag || - (maxfac_hint < 0) || (default_mask & ~MLOG_PRIMASK) || - (stderr_mask & ~MLOG_PRIMASK) || - (msgbuf_len < 0) || (msgbuf_len > 0 && msgbuf_len < 16)) { - return(-1); - } - /* init working area so we can use mlog_cleanout to bail out */ - memset(&mst, 0, sizeof(mst)); - mst.logfd = mst.udpsock = -1; - /* start filling it in */ - tagblen = strlen(tag) + MLOG_TAGPAD; /* add a bit for pid */ - newtag = calloc(1, tagblen); - if (!newtag) { - return(-1); - } -#ifdef MLOG_MUTEX /* create lock */ - if (pthread_mutex_init(&mst.mlogmux, NULL) != 0) { - /* XXX: consider cvt to PTHREAD_MUTEX_INITIALIZER */ - free(newtag); - return(-1); - } -#endif - /* it is now safe to use mlog_cleanout() for error handling */ - - mlog_lock(); /* now locked */ - if (flags & MLOG_LOGPID) { - snprintf(newtag, tagblen, "%s[%d]", tag, getpid()); - } else { - snprintf(newtag, tagblen, "%s", tag); - } - mst.def_mask = default_mask; - mst.stderr_mask = stderr_mask; - if (logfile) { - mst.logfile = strdup(logfile); - if (!mst.logfile) { - goto error; - } - mst.logfd = open(mst.logfile, O_RDWR|O_APPEND|O_CREAT, 0666); - if (mst.logfd < 0) { - fprintf(stderr, "mlog_open: cannot open %s: %s\n", - mst.logfile, strerror(errno)); - goto error; - } - } - /* - * save setting of MLOG_SYSLOG and MLOG_UCON_ON bits until these - * features are actually enabled. this allows us to use - * mlog_close() to clean up after us if we encounter an error. - */ - mst.oflags = (flags & ~(MLOG_SYSLOG|MLOG_UCON_ON)); - /* maxfac_hint should include default fac. */ - if (mlog_setnfac((maxfac_hint < 1) ? 1 : maxfac_hint) < 0) { - goto error; - } - if (msgbuf_len) { - mst.mb = calloc(1, msgbuf_len + sizeof(struct mlog_mbhead)); - if (!mst.mb) { - goto error; - } - mb = (struct mlog_mbhead *)mst.mb; - memcpy(mb, MBH_START, sizeof(mb->mbh_start)); - mb->mbh_beef = 0xdeadbeef; - mb->mbh_len = msgbuf_len; - mb->mbh_cnt = 0; - mb->mbh_wp = 0; - } - if (flags & MLOG_UCON_ON) { - mst.udpsock = socket(PF_INET, SOCK_DGRAM, 0); - if (mst.udpsock < 0) { - goto error; - } - mst.oflags |= MLOG_UCON_ON; - /* note that mst.{ucon_nslots,ucon_cnt,ucons} are all 0 */ - } - if ((flags & MLOG_UCON_ENV) != 0 && (dcon = getenv("MLOG_UCON")) != 0) { - for (mst.ucon_cnt = 1, cp = dcon ; *cp ; cp++) { - if (*cp == ';') { - mst.ucon_cnt++; - } - } - mst.ucons = calloc(1, mst.ucon_cnt * sizeof(*mst.ucons)); - if (!mst.ucons) { - goto error; - } - mst.ucon_nslots = mst.ucon_cnt; - mst.ucon_cnt = mlog_getucon(mst.ucon_cnt, mst.ucons, dcon); - /* - * note that it is possible to load ucons but still have the - * console disabled (e.g. !UCON_ON && UCON_ENV). in that case - * the program may enable ucon later via mlog_ucon_on(). - */ - } - (void) uname(&mst.uts); - mlog_xst.nodename = mst.uts.nodename; /* expose this */ - /* chop off the domainname */ - if ((flags & MLOG_FQDN) == 0) { - for (cp = mst.uts.nodename ; *cp && *cp != '.' ; cp++) - /*null*/; - *cp = 0; - } - /* cache value of isatty() to avoid extra system calls */ - mst.stdout_isatty = isatty(fileno(stdout)); - mst.stderr_isatty = isatty(fileno(stderr)); - /* - * log now open! - */ - if (flags & MLOG_SYSLOG) { - openlog(tag, (flags & MLOG_LOGPID) ? LOG_PID : 0, syslogfac); - mst.oflags |= MLOG_SYSLOG; - } - mlog_xst.tag = newtag; - mlog_unlock(); - return(0); -error: - /* - * we failed. mlog_cleanout can handle the cleanup for us. - */ - free(newtag); /* was never installed */ - mlog_unlock(); - mlog_cleanout(); - return(-1); -} - -/* - * mlog_reopen: reopen a multilog. reopen logfile for rotation or - * after a fork... update ucon and pid in tag (if enabled). - */ -int mlog_reopen(char *logfile) -{ - int rv; - char *oldpid, *sdup; - if (!mlog_xst.tag) { - return(-1); /* log wasn't open in the first place */ - } - rv = 0; - mlog_lock(); /* lock it down */ - /* reset ucon if open */ - if (mst.oflags & MLOG_UCON_ON) { - if (mst.udpsock >= 0) { - close(mst.udpsock); - } - mst.udpsock = socket(PF_INET, SOCK_DGRAM, 0); - if (mst.udpsock == -1) { - mst.oflags &= ~MLOG_UCON_ON; /* unlikely */ - } - } - /* - * refresh the pid - mlog_open pads the tag such that we cannot - * overflow by snprinting an int pid here... - */ - if ((mst.oflags & MLOG_LOGPID) != 0 && - (oldpid = strrchr(mlog_xst.tag, '[')) != NULL) { - snprintf(oldpid, MLOG_TAGPAD, "[%d]", getpid()); - } - if (mst.logfd >= 0) { - (void) close(mst.logfd); - } - mst.logfd = -1; - /* now the log file */ - if (logfile == NULL) { /* don't want a log file */ - if (mst.logfile) { /* flush out any old stuff */ - free(mst.logfile); - mst.logfile = NULL; - } - } else if (logfile[0] != '\0' && - (mst.logfile == NULL || strcmp(mst.logfile, logfile) != 0)) { - /* - * we are here if we have a new logfile name requested and it - * different from what was there before, so we need to malloc a - * new mst.logfile. - */ - sdup = strdup(logfile); - if (sdup == NULL) { - fprintf(stderr, "mlog_reopen: out of memory - strdup(%s)\n", - logfile); - /* XXX: what else can we do? */ - rv = -1; - goto done; - } - if (mst.logfile) { - free(mst.logfile); /* dump the old one, if present */ - } - mst.logfile = sdup; /* install the new one */ - } - if (mst.logfile) { - mst.logfd = open(mst.logfile, O_RDWR|O_APPEND|O_CREAT, 0666); - if (mst.logfd < 0) { - fprintf(stderr, "mlog_reopen: cannot reopen logfile %s: %s\n", - mst.logfile, strerror(errno)); - rv = -1; - } - } -done: - mlog_unlock(); - return(rv); -} - -/* - * mlog_close: close off an mlog and release any allocated resources - * (e.g. as part of an orderly shutdown, after all worker threads have - * been collected). if already closed, this function is a noop. - */ -void mlog_close() -{ - if (!mlog_xst.tag) { - return; /* return if already closed */ - } - free(mlog_xst.tag); - mlog_xst.tag = NULL; /* marks us as down */ - mlog_cleanout(); -} - -/* - * mlog_namefacility: assign a name to a facility - * return 0 on success, -1 on error (malloc problem). - */ -int mlog_namefacility(int facility, char *aname, char *lname) -{ - int rv; - char *n, *nl; - /* not open? */ - if (!mlog_xst.tag) { - return(-1); - } - rv = -1; /* assume error */ - mlog_lock(); - /* need to allocate facility? */ - if (facility >= mlog_xst.fac_cnt) { - if (mlog_setnfac(facility+1) < 0) { - goto done; - } - } - n = 0; - nl = 0; - if (aname) { - n = strdup(aname); - if (!n) { - goto done; - } - if (lname && (nl = strdup(lname)) == NULL) { - free(n); - goto done; - } - } - if (mlog_xst.mlog_facs[facility].fac_aname && - mlog_xst.mlog_facs[facility].fac_aname != default_fac0name) { - free(mlog_xst.mlog_facs[facility].fac_aname); - } - if (mlog_xst.mlog_facs[facility].fac_lname) { - free(mlog_xst.mlog_facs[facility].fac_lname); - } - mlog_xst.mlog_facs[facility].fac_aname = n; - mlog_xst.mlog_facs[facility].fac_lname = nl; - rv = 0; /* now we have success */ -done: - mlog_unlock(); - return(rv); -} - -/* - * mlog_allocfacility: allocate a new facility with the given name. - * return new facility number on success, -1 on error (malloc problem). - */ -int mlog_allocfacility(char *aname, char *lname) -{ - int newfac; - /* not open? */ - if (!mlog_xst.tag) { - return(-1); - } - mlog_lock(); - newfac = mlog_xst.fac_cnt; - if (mlog_setnfac(newfac+1) < 0) { - newfac = -1; - } - mlog_unlock(); - if (newfac == -1 || mlog_namefacility(newfac, aname, lname) < 0) { - return(-1); - } - return(newfac); -} - -/* - * mlog_setlogmask: set the logmask for a given facility. if the user - * uses a new facility, we ensure that our facility array covers it - * (expanding as needed). return oldmask on success, -1 on error. cannot - * fail if facility array was preallocated. - */ -int mlog_setlogmask(int facility, int mask) -{ - int oldmask; - /* not open? */ - if (!mlog_xst.tag) { - return(-1); - } - mlog_lock(); - /* need to allocate facility? */ - if (facility >= mlog_xst.fac_cnt && mlog_setnfac(facility+1) < 0) { - oldmask = -1; /* error */ - } else { - /* swap it in, masking out any naughty bits */ - oldmask = mlog_xst.mlog_facs[facility].fac_mask; - mlog_xst.mlog_facs[facility].fac_mask = (mask & MLOG_PRIMASK); - } - mlog_unlock(); - return(oldmask); -} - -/* - * mlog_setmasks: set the mlog masks for a set of facilities to a given - * level. the input string should look: PREFIX1=LEVEL1,PREFIX2=LEVEL2,... - * if the "PREFIX=" part is omitted, then the level applies to all defined - * facilities (e.g. mlog_setmasks("WARN") sets everything to WARN). - */ -void mlog_setmasks(char *mstr, int mlen0) -{ - char *m, *current, *fac, *pri, pbuf[8]; - int mlen, facno, clen, elen, faclen, prilen, prino; - /* not open? */ - if (!mlog_xst.tag) { - return; - } - m = mstr; - mlen = mlen0; - if (mlen < 0) { - mlen = strlen(mstr); - } - while (mlen > 0 && (*m == ' ' || *m == '\t')) { /* remove leading space */ - m++; - mlen--; - } - if (mlen <= 0) { - return; /* nothing doing */ - } - facno = 0; /* make sure it gets init'd */ - while (m) { - /* note current chunk, and advance m to the next one */ - current = m; - for (clen = 0 ; clen < mlen && m[clen] != ',' ; clen++) { - /*null*/; - } - if (clen < mlen) { - m = m + clen + 1; /* skip the comma too */ - mlen = mlen - (clen + 1); - } else { - m = NULL; - mlen = 0; - } - if (clen == 0) { - continue; /* null entry, just skip it */ - } - for (elen = 0 ; elen < clen && current[elen] != '=' ; elen++) { - /*null*/; - } - if (elen < clen) { /* has a facility prefix? */ - fac = current; - faclen = elen; - pri = current + elen + 1; - prilen = clen - (elen + 1); - } else { - fac = NULL; /* means we apply to all facs */ - faclen = 0; - pri = current; - prilen = clen; - } - if (m == NULL) { - /* remove trailing white space from count */ - while (prilen > 0 && (pri[prilen-1] == '\n' || - pri[prilen-1] == ' ' || - pri[prilen-1] == '\t') ) { - prilen--; - } - } - /* parse complete! */ - /* process priority */ - if (prilen > 5) { /* we know it can't be longer than this */ - prino = -1; - } else if (prilen < 0) { /* This if() block gets rid of a */ - prino = -1; /* compiler warning. */ - } else { - memset(pbuf, 0, sizeof(pbuf)); - strncpy(pbuf, pri, prilen); - prino = mlog_str2pri(pbuf); - } - if (prino == -1) { - mlog(MLOG_ERR, "mlog_setmasks: %.*s: unknown priority %.*s", - faclen, fac, prilen, pri); - continue; - } - /* process facility */ - if (fac) { - mlog_lock(); - for (facno = 0 ; facno < mlog_xst.fac_cnt ; facno++) { - if (mlog_xst.mlog_facs[facno].fac_aname && - strlen(mlog_xst.mlog_facs[facno].fac_aname) == faclen && - strncasecmp(mlog_xst.mlog_facs[facno].fac_aname, fac, - faclen) == 0) { - break; - } - if (mlog_xst.mlog_facs[facno].fac_lname && - strlen(mlog_xst.mlog_facs[facno].fac_lname) == faclen && - strncasecmp(mlog_xst.mlog_facs[facno].fac_lname, fac, - faclen) == 0) { - break; - } - } - mlog_unlock(); - if (facno >= mlog_xst.fac_cnt) { - mlog(MLOG_ERR, "mlog_setmasks: unknown facility %.*s", - faclen, fac); - continue; - } - } - if (fac) { - /* apply only to this fac */ - mlog_setlogmask(facno, prino); - } else { - /* apply to all facilities */ - for (facno = 0 ; facno < mlog_xst.fac_cnt ; facno++) { - mlog_setlogmask(facno, prino); - } - } - } -} - -/* - * mlog_getmasks: get current masks levels - */ -int mlog_getmasks(char *buf, int discard, int len, int unterm) -{ - char *bp, *myname; - const char *p; - int skipcnt, resid, total, facno; - char store[64]; /* fac unlikely to overflow this */ - /* not open? */ - if (!mlog_xst.tag) { - return(0); - } - bp = buf; - skipcnt = discard; - resid = len; - total = 0; - mlog_lock(); - for (facno = 0 ; facno < mlog_xst.fac_cnt ; facno++) { - if (facno) { - mlog_bput(&bp, &skipcnt, &resid, &total, ","); - } - if (mlog_xst.mlog_facs[facno].fac_lname != NULL) { - myname = mlog_xst.mlog_facs[facno].fac_lname; - } else { - myname = mlog_xst.mlog_facs[facno].fac_aname; - } - if (myname == NULL) { - snprintf(store, sizeof(store), "%d", facno); - mlog_bput(&bp, &skipcnt, &resid, &total, store); - } else { - mlog_bput(&bp, &skipcnt, &resid, &total, myname); - } - mlog_bput(&bp, &skipcnt, &resid, &total, "="); - p = mlog_pristr(mlog_xst.mlog_facs[facno].fac_mask); - store[1] = 0; - while (*p && *p != ' ' && *p != '-') { - store[0] = *p; - p++; - mlog_bput(&bp, &skipcnt, &resid, &total, store); - } - } - mlog_unlock(); - strncpy(store, "\n", sizeof(store)); - mlog_bput(&bp, &skipcnt, &resid, &total, store); - if (unterm == 0) { - mlog_bput(&bp, &skipcnt, &resid, &total, NULL); - } - /* buf == NULL means probe for length ... */ - return((buf == NULL) ? total : len - resid); -} - -/* - * mlog_abort_hook: set mlog abort hook - */ -void *mlog_abort_hook(void (*abort_hook)(void)) -{ - void *ret; - if (mlog_xst.tag) { - mlog_lock(); - ret = mst.abort_hook; /* save old value for return */ - mst.abort_hook = abort_hook; - mlog_unlock(); - } else { - ret = NULL; - } - return(ret); -} - -/* - * mlog_dmesg: obtain pointers to the current contents of the message - * buffer. since the message buffer is circular, the result may come - * back in two pieces. - * return 0 on success, -1 on error (not open, or no message buffer) - */ -int mlog_dmesg(char **b1p, int *b1len, char **b2p, int *b2len) -{ - /* first check if we are open and have the buffer */ - if (!mlog_xst.tag || !mst.mb) { - return(-1); - } - mlog_lock(); - mlog_dmesg_mbuf(b1p, b1len, b2p, b2len); - mlog_unlock(); - return(0); -} - -/* - * mlog_mbcount: give a hint as to the current size of the message buffer. - */ -int mlog_mbcount() -{ - struct mlog_mbhead *mb; - int rv; - /* first check if we are open and have the buffer */ - if (!mlog_xst.tag || !mst.mb) { - return(0); - } - mlog_lock(); - rv = 0; - mb = (struct mlog_mbhead *)mst.mb; - if (mb) { - rv = mb->mbh_cnt; - } - mlog_unlock(); - return(rv); -} - -/* - * mlog_mbcopy: safely copy the most recent bytes of the message buffer - * over into another buffer for use. returns # of bytes copied, -1 on - * error. - */ -int mlog_mbcopy(char *buf, int offset, int len) -{ - char *b1, *b2, *bp; - int b1l, b2l, got, want, skip; - if (!buf || len < 1 || !mlog_xst.tag) { - return(-1); - } - if (!mst.mb) { - return(0); /* no message buffer, treat like reading /dev/null? */ - } - mlog_lock(); - mlog_dmesg_mbuf(&b1, &b1l, &b2, &b2l); - /* pull back from the newest data by 'offset' bytes */ - if (offset > 0 && b2l > 0) { - if (offset > b2l) { - offset -= b2l; - b2l = 0; - } else { - b2l -= offset; - offset = 0; - } - } - if (offset > 0 && b1l > 0) { - if (offset > b1l) { - b1l = 0; - } else { - b1l -= offset; - } - } - got = b1l + b2l; /* total bytes in msg buf */ - want = (len > got) ? got : len; /* how many we want, capped by got */ - skip = (want < got) ? got - want : 0; /* how many we skip over */ - if (skip) { - if (skip > b1l) { - skip -= b1l; - b1l = 0; - } else { - b1l -= skip; - b1 += skip; - skip = 0; - } - if (skip > b2l) { - b2l = 0; - } else { - b2l -= skip; - b2 += skip; - } - } - bp = buf; - if (b1l) { - memcpy(bp, b1, b1l); - bp += b1l; - } - if (b2l) { - memcpy(bp, b2, b2l); - } - mlog_unlock(); - return(want); -} - -/* XXXCDC: BEGIN TMP */ -/* - * plfs_debug: tmp wrapper - */ -void plfs_debug(const char *fmt, ...) -{ - va_list ap; - va_start(ap, fmt); - vmlog(MLOG_DBG, fmt, ap); - va_end(ap); -} -/* XXXCDC: END TMP */ - -/* - * mlog: multilog a message... generic wrapper for the the core vmlog - * function. note that a log line cannot be larger than MLOG_TBSZ (4096) - * [if it is larger it will be (silently) truncated]. - */ -void mlog(int flags, const char *fmt, ...) -{ - va_list ap; - va_start(ap, fmt); - vmlog(flags, fmt, ap); - va_end(ap); -} - -/* - * mlog_abort: like mlog, but prints the stack and does an abort after - * processing the log. for aborts, we always log to STDERR. - */ -void mlog_abort(int flags, const char *fmt, ...) -{ - va_list ap; - va_start(ap, fmt); - vmlog(flags|MLOG_STDERR, fmt, ap); - va_end(ap); - if (mlog_xst.tag && mst.abort_hook) { /* call hook? */ - mst.abort_hook(); - } - abort(); - /*NOTREACHED*/ -} - -/* - * mlog_exit: like mlog, but exits with the given status after processing - * the log. we always log to STDERR. - */ -void mlog_exit(int status, int flags, const char *fmt, ...) -{ - va_list ap; - va_start(ap, fmt); - vmlog(flags|MLOG_STDERR, fmt, ap); - va_end(ap); - exit(status); - /*NOTREACHED*/ -} - -/* - * mlog_findmesgbuf: search for a message buffer inside another buffer - * (typically a mmaped core file). does not access mlog global state. - * return 0 on success, -1 on error - */ -int mlog_findmesgbuf(char *b, int len, char **b1p, int *b1l, - char **b2p, int *b2l) -{ - char *ptr, *headend, *bufend; - struct mlog_mbhead mb; - uint32_t skip; - ptr = b; /* current pointer */ - bufend = b + len; /* end of buffer */ - headend = bufend - sizeof(struct mlog_mbhead); /* can't start from here */ - for (/*null*/ ; ptr < headend ; ptr += 4) { - if (memcmp(ptr, MBH_START, sizeof(MBH_START) - 1) != 0) { - continue; - } - /* - * might have found it. handle byte order and sanity check it. - */ - memcpy(&mb, ptr, sizeof(mb)); /* make a copy to ensure alignment */ - if (mb.mbh_beef == wswap(0xdeadbeef)) { - mb.mbh_len = wswap(mb.mbh_len); - mb.mbh_cnt = wswap(mb.mbh_cnt); - mb.mbh_wp = wswap(mb.mbh_wp); - } - if (mb.mbh_cnt > mb.mbh_len) { - continue; - } - if (mb.mbh_wp > mb.mbh_len) { - continue; - } - if (ptr + mb.mbh_len > bufend || - ptr + mb.mbh_len < ptr) { - continue; - } - /* - * looks good! - */ - *b1p = ptr + sizeof(mb) + mb.mbh_wp; - *b1l = mb.mbh_len - mb.mbh_wp; - *b2p = ptr + sizeof(mb); - *b2l = mb.mbh_wp; - skip = mb.mbh_len - mb.mbh_cnt; - if (skip > *b1l) { - skip -= *b1l; - *b1p = *b2p; - *b2p = 0; - *b1l = *b2l; - *b2l = 0; - } - if (skip) { - *b1p = *b1p + skip; - *b1l = *b1l - skip; - } - return(0); - } - return(-1); -} - -/* - * mlog_ucon_on: enable ucon (UDP console) - * return 0 on success, -1 on error - */ -int mlog_ucon_on() -{ - /* ensure open before doing stuff */ - if (!mlog_xst.tag) { - return(-1); - } - /* note that mst.ucons/mst.ucon_cnt must already be valid */ - mlog_lock(); - if ((mst.oflags & MLOG_UCON_ON) == 0) { - mst.udpsock = socket(PF_INET, SOCK_DGRAM, 0); - if (mst.udpsock >= 0) { - mst.oflags |= MLOG_UCON_ON; - } - } - mlog_unlock(); - return( ((mst.oflags & MLOG_UCON_ON) != 0) ? 0 : -1); -} - -/* - * mlog_ucon_off: disable ucon (UDP console) if enabled - * return 0 on success, -1 on error - */ -int mlog_ucon_off() -{ - /* ensure open before doing stuff */ - if (!mlog_xst.tag) { - return(-1); - } - mlog_lock(); - mst.oflags = mst.oflags & ~MLOG_UCON_ON; - if (mst.udpsock >= 0) { - close(mst.udpsock); - } - mst.udpsock = -1; - mlog_unlock(); - return(0); -} - -/* - * mlog_ucon_add: add an endpoint as a ucon - * return 0 on success, -1 on error - */ -int mlog_ucon_add(char *host, int port) -{ - char portstr[8]; - int rv, sz; - void *newbuf; - /* ensure open and sane before doing stuff */ - if (!mlog_xst.tag || port < 1 || port > 65535) { - return(-1); - } - rv = -1; /* assume fail */ - mlog_lock(); - /* grow the array if necessary */ - if (mst.ucon_cnt == mst.ucon_nslots) { - sz = mst.ucon_cnt + 1; - newbuf = calloc(1, sz * sizeof(*mst.ucons)); - if (!newbuf) { - goto done; - } - if (mst.ucons) { - memcpy(newbuf, mst.ucons, mst.ucon_cnt * sizeof(*mst.ucons)); - free(mst.ucons); - } - mst.ucons = newbuf; - mst.ucon_nslots = mst.ucon_cnt + 1; - } - snprintf(portstr, sizeof(portstr), "%d", port); - if (mlog_resolvhost(&mst.ucons[mst.ucon_cnt], host, portstr) < 0) { - goto done; - } - /* got it! */ - mst.ucon_cnt++; - rv = 0; -done: - mlog_unlock(); - return(rv); -} - -/* - * mlog_ucon_rm: remove an ucon endpoint (port in host byte order). - * return 0 on success, -1 on error - */ -int mlog_ucon_rm(char *host, int port) -{ - char portstr[8]; - struct sockaddr_in target; - int rv, lcv; - /* ensure open and sane before doing stuff */ - if (!mlog_xst.tag || port < 1 || port > 65535 || mst.ucon_cnt < 1) { - return(-1); - } - /* resolve the hostname */ - snprintf(portstr, sizeof(portstr), "%d", port); - if (mlog_resolvhost(&target, host, portstr) < 0) { - return(-1); - } - rv = -1; - mlog_lock(); - /* look for it ... */ - for (lcv = 0 ; lcv < mst.ucon_cnt ; lcv++) { - if (memcmp(&target, &mst.ucons[lcv], sizeof(*mst.ucons)) == 0) { - break; - } - } - /* didn't find it ? */ - if (lcv >= mst.ucon_cnt) { - goto done; - } - /* if not the last item in the list, pull that item forward */ - if (lcv < mst.ucon_cnt - 1) { - memcpy(&mst.ucons[lcv], &mst.ucons[mst.ucon_cnt - 1], - sizeof(*mst.ucons)); - } - /* remove last item in list */ - mst.ucon_cnt--; - rv = 0; - /* - * done! - */ -done: - mlog_unlock(); - return(rv); -} - diff --git a/meta/src/Mlog2/mlog2.h b/meta/src/Mlog2/mlog2.h deleted file mode 100644 index f7d1e4fc7..000000000 --- a/meta/src/Mlog2/mlog2.h +++ /dev/null @@ -1,439 +0,0 @@ -/* - * The Self-* Storage System Project - * Copyright (c) 2004-2011, Carnegie Mellon University. - * All rights reserved. - * http://www.pdl.cmu.edu/ (Parallel Data Lab at Carnegie Mellon) - * - * This software is being provided by the copyright holders under the - * following license. By obtaining, using and/or copying this software, - * you agree that you have read, understood, and will comply with the - * following terms and conditions: - * - * Permission to reproduce, use, and prepare derivative works of this - * software is granted provided the copyright and "No Warranty" statements - * are included with all reproductions and derivative works and associated - * documentation. This software may also be redistributed without charge - * provided that the copyright and "No Warranty" statements are included - * in all redistributions. - * - * NO WARRANTY. THIS SOFTWARE IS FURNISHED ON AN "AS IS" BASIS. - * CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER - * EXPRESSED OR IMPLIED AS TO THE MATTER INCLUDING, BUT NOT LIMITED - * TO: WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY - * OF RESULTS OR RESULTS OBTAINED FROM USE OF THIS SOFTWARE. CARNEGIE - * MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT - * TO FREEDOM FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. - * COPYRIGHT HOLDERS WILL BEAR NO LIABILITY FOR ANY USE OF THIS SOFTWARE - * OR DOCUMENTATION. - */ - -/* - * mlog.h define API for multilog message logging system - * 08-Apr-2004 chuck@ece.cmu.edu - */ - -#ifndef _MLOG_H_ -#define _MLOG_H_ - -/* - * mlog flag values - */ -#define MLOG_STDERR 0x80000000 /* always log to stderr */ -#define MLOG_UCON_ON 0x40000000 /* enable UDP console on mlog_open */ -#define MLOG_UCON_ENV 0x20000000 /* get UCON list from $MLOG_UCON */ -#define MLOG_SYSLOG 0x10000000 /* syslog(3) the messages as well */ -#define MLOG_LOGPID 0x08000000 /* include pid in log tag */ -#define MLOG_FQDN 0x04000000 /* log fully quallified domain name */ -#define MLOG_STDOUT 0x02000000 /* always log to stdout */ -/* spare bits: 0x01000000-0x00800000 */ -#define MLOG_PRIMASK 0x007f0000 /* priority mask */ -#define MLOG_EMERG 0x00700000 /* emergency */ -#define MLOG_ALERT 0x00600000 /* alert */ -#define MLOG_CRIT 0x00500000 /* critical */ -#define MLOG_ERR 0x00400000 /* error */ -#define MLOG_WARN 0x00300000 /* warning */ -#define MLOG_NOTE 0x00200000 /* notice */ -#define MLOG_INFO 0x00100000 /* info */ -#define MLOG_PRISHIFT 20 /* to get non-debug level */ -#define MLOG_DPRISHIFT 16 /* to get debug level */ -#define MLOG_DBG 0x000f0000 /* all debug streams */ -#define MLOG_DBG0 0x00080000 /* debug stream 0 */ -#define MLOG_DBG1 0x00040000 /* debug stream 1 */ -#define MLOG_DBG2 0x00020000 /* debug stream 2 */ -#define MLOG_DBG3 0x00010000 /* debug stream 3 */ -#define MLOG_FACMASK 0x0000ffff /* facility mask */ - -/* - * structures: not really part of the external API, but exposed here - * so we can do the priority filter (mlog_filter) in a macro before - * calling mlog() ... no point evaluating the mlog() args if the - * filter is going to filter log out... - */ - -/** - * mlog_fac: facility name and mask info - */ -struct mlog_fac { - int fac_mask; /*!< log level for this facility */ - char *fac_aname; /*!< abbreviated name of this facility [malloced] */ - char *fac_lname; /*!< optional long name of this facility [malloced] */ -}; - -/** - * mlog_xstate: exposed global state... just enough for a level check - */ -struct mlog_xstate { - char *tag; /*!< tag string [malloced] */ - /* note that tag is NULL if mlog is not open/inited */ - struct mlog_fac *mlog_facs; /*!< array of facility info [malloced] */ - int fac_cnt; /*!< # of facilities we are using */ - char *nodename; /*!< pointer to our utsname */ -}; - - -#if defined(__cplusplus) -extern "C" { /* __BEGIN_DECLS */ -#endif - - /* - * API prototypes and inlines - */ - - /** - * mlog_filter: determine if we should log a message, based on its priority - * and the current mask level for the facility. flags is typically a - * constant, so the C optimizer should be able to reduce this inline - * quite a bit. no locking for threaded environment here, as we assume - * fac_cnt can only grow larger and threaded apps will shutdown threads - * before doing a mlog_close. - * - * @param flags the MLOG flags - * @return 1 if we should log, 0 if we should filter - */ - static inline int mlog_filter(int flags) - { - extern struct mlog_xstate mlog_xst; - unsigned int fac, lvl, mask; - /* first, ensure mlog is open */ - if (!mlog_xst.tag) { - return(0); - } - /* get the facility and level of this log message */ - fac = flags & MLOG_FACMASK; - lvl = flags & MLOG_PRIMASK; - /* - * check for valid facility. if it is not valid, then we convert - * it to the default facility because that seems like a better thing - * to do than drop the message. - */ - if (fac >= (unsigned)mlog_xst.fac_cnt) { - fac = 0; /* 0 == default facility */ - } - /* now we can get the mask we need */ - mask = mlog_xst.mlog_facs[fac].fac_mask; - /* for non-debug logs we directly compare the mask and level */ - if (lvl >= MLOG_INFO) { - return( (lvl < mask) ? 0 : 1); - } - /* - * for debugging logs, we check the channel mask bits. applications - * that don't use debugging channels always log with all the bits set. - */ - return( (lvl & mask) == 0 ? 0 : 1); - } - - /* XXXCDC: BEGIN TMP */ - - /* - * This is here temporarily because some of the code still calls the old - * plfs_debug. This routine now tranforms the plfs_debug call into an - * mlog call. - */ - - void plfs_debug(const char *fmt, ...); - - /* XXXCDC: END TMP */ - - - /** - * mlog: multilog a message... generic wrapper for the the core vmlog - * function. note that a log line cannot be larger than MLOG_TBSZ (4096) - * [if it is larger it will be (silently) truncated]. facility should - * allocated with mlog_open(), mlog_namefacility(), mlog_allocfacility(), - * or mlog_setlogmask() before being used (or the logs will get converted - * to the default facility, #0). - * - * @param flags facility+level+misc flags - * @param fmt printf-style format string - * @param ... printf-style args - */ - void mlog(int flags, const char *fmt, ...) - __attribute__((__format__(__printf__, 2, 3))); - - /** - * mlog_abort: like mlog, but does an abort after processing the log. - * for aborts, we always log to STDERR. - * - * @param flags facility+level+misc flags - * @param fmt printf-style format string - * @param ... printf-style args - */ - void mlog_abort(int flags, const char *fmt, ...) - __attribute__((__noreturn__, __format__(__printf__, 2, 3))); - - /** - * mlog_abort_hook: establish an abort "hook" to call before doing - * an abort (e.g. a hook to print the stack, or save some debug info). - * - * @param hook the abort hook function - * @return the old hook (NULL if there wasn't one) - */ - void *mlog_abort_hook(void (*abort_hook)(void)); - - /** - * mlog_allocfacility: allocate a new facility with the given name - * - * @param aname the abbr. name for the facility - can be null for no name - * @param lname the long name for the new facility - can be null for no name - * @return new facility number on success, -1 on error - malloc problem. - */ - int mlog_allocfacility(char *aname, char *lname); - - /** - * mlog_close: close off an mlog and release any allocated resources. - * if already close, this function is a noop. - */ - void mlog_close(void); - - /** - * mlog_dmesg: obtain pointers to the current contents of the message - * buffer. since the message buffer is circular, the result may come - * back in two pieces. note that this function returns pointers into - * the live message buffer, so the app had best not call mlog again - * until it is done with the pointers. - * - * @param b1p returns pointer to first buffer here - * @param b1len returns length of data in b1 - * @param b2p returns pointer to second buffer here (null if none) - * @param b2len returns length of b2 or zero if b2 is null - * @return 0 on success, -1 on error (not open, or no message buffer) - */ - int mlog_dmesg(char **b1p, int *b1len, char **b2p, int *b2len); - - /** - * mlog_mbcount: give hint as to current size of message buffer. - * (buffer size may change if mlog is called after this...) - * - * @return number of bytes in msg buffer (zero if empty/disabled) - */ - int mlog_mbcount(void); - - /** - * mlog_mbcopy: safely copy the most recent bytes of the message buffer - * over into another buffer for use. - * - * @param buf buffer to copy to - * @param offset offset in message buffer (0 to start at the end) - * @param len length of the buffer - * @return number of bytes copied (<= len), or -1 on error - */ - int mlog_mbcopy(char *buf, int offset, int len); - - /** - * mlog_exit: like mlog, but exits with the given status after processing - * the log. we always log to STDERR. - * - * @param status the value to exit with - * @param flags facility+level+misc flags - * @param fmt printf-style format string - * @param ... printf-style args - */ - void mlog_exit(int status, int flags, const char *fmt, ...) - __attribute__((__noreturn__, __format__(__printf__, 3, 4))); - - /** - * mlog_findmesgbuf: search for a message buffer inside another buffer - * (typically a mmaped core file). - * - * @param b the buffer to search - * @param len the length of the buffer b - * @param b1p returns pointer to first buffer here - * @param b1l returns length of data in b1 - * @param b2p returns pointer to second buffer here (null if none) - * @param b2l returns length of b2 or zero if b2 is null - * @return 0 on success, -1 on error - */ - int mlog_findmesgbuf(char *b, int len, char **b1p, int *b1l, - char **b2p, int *b2l); - - /** - * mlog_namefacility: assign a name to a facility. since the facility - * number is used as an index into an array, don't choose large numbers. - * - * @param facility the facility to name - * @param aname the new abbreviated name, or null to remove the name - * @param lname optional long name (null if not needed) - * @return 0 on success, -1 on error (malloc problem). - */ - int mlog_namefacility(int facility, char *aname, char *lname); - - /** - * mlog_open: open a multilog (uses malloc). you can only have one - * multilog open at a time, but you can use multiple facilities. - * if an mlog is already open, then this call will fail. if you use - * the message buffer, you will prob want it to be 1K or larger. - * - * @param tag string we tag each line with, optionally followed by pid - * @param maxfac_hint hint as to largest user fac value that will be used - * @param default_mask the default mask to use for each facility - * @param stderr_mask messages with a mask above this go to stderr. If - * this is 0, then output goes to stderr only if MLOG_STDERR is used - * (either in mlog_open or in mlog). - * @param logfile log file name, or null if no log file - * @param msgbuf_len size of message buffer, or zero if no message buffer - * @param flags STDERR, UCON_ON, UCON_ENV, SYSLOG, LOGPID - * @param syslogfac facility to use if MLOG_SYSLOG is set in flags - * @return 0 on success, -1 on error. - */ - int mlog_open(char *tag, int maxfac_hint, int default_mask, int stderr_mask, - char *logfile, int msgbuf_len, int flags, int syslogfac); - - /** - * mlog_str2pri: convert a priority string to an int pri value to allow - * for more user-friendly programs. - * - * @param pstr the priority string - * @return -1 (an invalid pri) on error. - */ - int mlog_str2pri(const char *pstr); - - /** - * mlog_reopen: reopen a multilog. this will reopen the log file, - * reset the ucon socket (if on), and refresh the pid in the tag (if - * present). call this to rotate log files or after a fork (which - * changes the pid and may also close fds). the logfile param should - * be set to a zero-length string ("") to keep the old value of logfile. - * if the logfile is NULL, then any open logfiles will be switched off. - * if the logfile is a non-zero length string, it is the new logfile name. - * - * @param logfile settings for the logfile after reopen (see above). - * @return 0 on success, -1 on error. - */ - int mlog_reopen(char *logfile); - - /** - * mlog_setlogmask: set the logmask for a given facility. if the user - * uses a new facility, we ensure that our facility array covers it - * (expanding as needed). - * - * @param facility the facility we are adjusting (16 bit int) - * @param mask the new mask to apply - * @return the old mask val, or -1 on error. cannot fail if facility array - * was preallocated. - */ - int mlog_setlogmask(int facility, int mask); - - /** - * mlog_setmasks: set mlog masks for a set of facilities to a given level. - * the input string should look like: PREFIX1=LEVEL1,PREFIX2=LEVEL2,... - * where the "PREFIX" is the facility name defined with mlog_namefacility(). - * - * @param mstr settings to use (doesn't have to be null term'd if mstr >= 0) - * @param mlen length of mstr (if < 0, assume null terminated, use strlen) - */ - void mlog_setmasks(char *mstr, int mlen); - - /** - * mlog_getmasks: get current mask level as a string (not null terminated). - * if the buffer is null, we probe for length rather than fill. - * - * @param buf the buffer to put the results in (NULL == probe for length) - * @param discard bytes to discard before starting to fill buf (normally 0) - * @param len length of the buffer - * @param unterm if non-zero do not include a trailing null - * @return bytes returned (may be trunced and non-null terminated if == len) - */ - int mlog_getmasks(char *buf, int discard, int len, int unterm); - - /** - * mlog_ucon_add: add an endpoint as a ucon - * - * @param host hostname (or IP) of remote endpoint - * @param port udp port (in host byte order) - * @return 0 on success, -1 on error - */ - int mlog_ucon_add(char *host, int port); - - /** - * mlog_ucon_on: enable ucon (UDP console) - * - * @return 0 on success, -1 on error - */ - int mlog_ucon_on(void); - - /** - * mlog_ucon_off: disable ucon (UDP console) if enabled - * - * @return 0 on success, -1 on error - */ - int mlog_ucon_off(void); - - /** - * mlog_ucon_rm: remove an ucon endpoint - * - * @param host hostname (or IP) of remote endpoint - * @param port udp port (in host byte order) - * @return 0 on success, -1 on error - */ - int mlog_ucon_rm(char *host, int port); - - -#ifndef MLOG_NOMACRO_OPT - /* - * here's some cpp-based optimizations - */ - - /* - * use -DMLOG_NEVERLOG=1 to compile out all the mlog calls (e.g. for - * performance testing, when you want to get rid of all extra overheads). - */ -#ifndef MLOG_NEVERLOG -#define MLOG_NEVERLOG 0 /* default value is to keep mlog */ -#endif - - /* - * turn log into a macro so that we can check the log level before - * evaluating all the mlog() args. no point in computing the args - * and building a call stack if we are not going to do anything. - * - * you can't do this with inline functions, because gcc will not - * inline functions that use "..." so doing something like: - * void inline mlog(int f, char *c, ...) { return; } - * and then - * mlog(MLOG_INFO, "fputs=%d", fputs("fputs was called", stdout)); - * will not inline out the function call setup, so fputs() will - * get called even though mlog doesn't do anything. - * - * vmlog() will refilter, but it also has to handle stderr_mask, so - * it isn't a big deal to have it recheck the level... could add - * a flag to tell vmlog() to skip the filter if it was an issue. - * - * this assumes your cpp supports "..." and __VA_ARGS__ (gcc does). - * - * note that cpp does not expand the mlog() call inside the #define, - * that goes to the real mlog function. - */ - -#define mlog(LEVEL, ...) do { \ - if (MLOG_NEVERLOG == 0 && mlog_filter(LEVEL)) \ - mlog((LEVEL), __VA_ARGS__); \ - } while (0) - -#endif /* MLOG_NOMACRO_OPT */ - - -#if defined(__cplusplus) -} /* __END_DECLS */ -#endif - -#endif /* _MLOG_H_ */ diff --git a/meta/src/Mlog2/mlogfacs2.h b/meta/src/Mlog2/mlogfacs2.h deleted file mode 100644 index e06640043..000000000 --- a/meta/src/Mlog2/mlogfacs2.h +++ /dev/null @@ -1,151 +0,0 @@ -/* - * The Self-* Storage System Project - * Copyright (c) 2011, Carnegie Mellon University. - * All rights reserved. - * http://www.pdl.cmu.edu/ (Parallel Data Lab at Carnegie Mellon) - * - * This software is being provided by the copyright holders under the - * following license. By obtaining, using and/or copying this software, - * you agree that you have read, understood, and will comply with the - * following terms and conditions: - * - * Permission to reproduce, use, and prepare derivative works of this - * software is granted provided the copyright and "No Warranty" statements - * are included with all reproductions and derivative works and associated - * documentation. This software may also be redistributed without charge - * provided that the copyright and "No Warranty" statements are included - * in all redistributions. - * - * NO WARRANTY. THIS SOFTWARE IS FURNISHED ON AN "AS IS" BASIS. - * CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER - * EXPRESSED OR IMPLIED AS TO THE MATTER INCLUDING, BUT NOT LIMITED - * TO: WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY - * OF RESULTS OR RESULTS OBTAINED FROM USE OF THIS SOFTWARE. CARNEGIE - * MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT - * TO FREEDOM FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT. - * COPYRIGHT HOLDERS WILL BEAR NO LIABILITY FOR ANY USE OF THIS SOFTWARE - * OR DOCUMENTATION. - */ - -/* - * facility names. - * - * DO NOT EDIT-- this file is automatically generated by makefacs.pl. - */ - -#ifndef _MLOGFACS_H_ -#define _MLOGFACS_H_ - -#include "mlog2.h" /* for MLOG_ defines */ - -#if defined(MLOG_FACSARRAY) || defined(MLOG_AFACSARRAY) -static const char *mlog_facsarray[] = { - "MLOG", /* 0 -- MLOG default fac */ - "MDHIM_CLIENT", /* 1 */ - "MDHIM_SERVER", /* 2 */ - "STORE", /* 3 */ - "MPI", /* 4 */ - 0, /* 5 */ -}; -#endif /* MLOG_FACSARRAY || MLOG_AFACSARRAY */ - -#if defined(MLOG_FACSARRAY) || defined(MLOG_LFACSARRAY) -static const char *mlog_lfacsarray[] = { - "MLOG", /* 0 -- MLOG default fac */ - "MDHIM client", /* 1 */ - "MDHIM range server", /* 2 */ - "storage", /* 3 */ - "MPI", /* 4 */ - 0, /* 5 */ -}; -#endif /* MLOG_LFACSARRAY || MLOG_LFACSARRAY */ - -/* - * standard facility defines - */ -#define MLOGFAC_MDHIM_CLIENT 1 /* MDHIM client */ -#define MLOGFAC_MDHIM_SERVER 2 /* MDHIM range server */ -#define MLOGFAC_STORE 3 /* storage */ -#define MLOGFAC_MPI 4 /* MPI */ - -/* - * MDHIM client MLOG levels - */ -#define MDHIM_CLIENT_EMERG (1 | MLOG_EMERG) -#define MDHIM_CLIENT_ALERT (1 | MLOG_ALERT) -#define MDHIM_CLIENT_CRIT (1 | MLOG_CRIT) -#define MDHIM_CLIENT_ERR (1 | MLOG_ERR) -#define MDHIM_CLIENT_WARN (1 | MLOG_WARN) -#define MDHIM_CLIENT_NOTE (1 | MLOG_NOTE) -#define MDHIM_CLIENT_INFO (1 | MLOG_INFO) -#define MDHIM_CLIENT_DBG (1 | MLOG_DBG) -#define MDHIM_CLIENT_DBG0 (1 | MLOG_DBG0) -#define MDHIM_CLIENT_DAPI MDHIM_CLIENT_DBG0 -#define MDHIM_CLIENT_DBG1 (1 | MLOG_DBG1) -#define MDHIM_CLIENT_DINTAPI MDHIM_CLIENT_DBG1 -#define MDHIM_CLIENT_DBG2 (1 | MLOG_DBG2) -#define MDHIM_CLIENT_DCOMMON MDHIM_CLIENT_DBG2 -#define MDHIM_CLIENT_DBG3 (1 | MLOG_DBG3) -#define MDHIM_CLIENT_DRARE MDHIM_CLIENT_DBG3 - -/* - * MDHIM range server MLOG levels - */ -#define MDHIM_SERVER_EMERG (2 | MLOG_EMERG) -#define MDHIM_SERVER_ALERT (2 | MLOG_ALERT) -#define MDHIM_SERVER_CRIT (2 | MLOG_CRIT) -#define MDHIM_SERVER_ERR (2 | MLOG_ERR) -#define MDHIM_SERVER_WARN (2 | MLOG_WARN) -#define MDHIM_SERVER_NOTE (2 | MLOG_NOTE) -#define MDHIM_SERVER_INFO (2 | MLOG_INFO) -#define MDHIM_SERVER_DBG (2 | MLOG_DBG) -#define MDHIM_SERVER_DBG0 (2 | MLOG_DBG0) -#define MDHIM_SERVER_DAPI MDHIM_SERVER_DBG0 -#define MDHIM_SERVER_DBG1 (2 | MLOG_DBG1) -#define MDHIM_SERVER_DINTAPI MDHIM_SERVER_DBG1 -#define MDHIM_SERVER_DBG2 (2 | MLOG_DBG2) -#define MDHIM_SERVER_DCOMMON MDHIM_SERVER_DBG2 -#define MDHIM_SERVER_DBG3 (2 | MLOG_DBG3) -#define MDHIM_SERVER_DRARE MDHIM_SERVER_DBG3 - -/* - * storage MLOG levels - */ -#define STORE_EMERG (3 | MLOG_EMERG) -#define STORE_ALERT (3 | MLOG_ALERT) -#define STORE_CRIT (3 | MLOG_CRIT) -#define STORE_ERR (3 | MLOG_ERR) -#define STORE_WARN (3 | MLOG_WARN) -#define STORE_NOTE (3 | MLOG_NOTE) -#define STORE_INFO (3 | MLOG_INFO) -#define STORE_DBG (3 | MLOG_DBG) -#define STORE_DBG0 (3 | MLOG_DBG0) -#define STORE_DAPI STORE_DBG0 -#define STORE_DBG1 (3 | MLOG_DBG1) -#define STORE_DINTAPI STORE_DBG1 -#define STORE_DBG2 (3 | MLOG_DBG2) -#define STORE_DCOMMON STORE_DBG2 -#define STORE_DBG3 (3 | MLOG_DBG3) -#define STORE_DRARE STORE_DBG3 - -/* - * MPI MLOG levels - */ -#define MPI_EMERG (4 | MLOG_EMERG) -#define MPI_ALERT (4 | MLOG_ALERT) -#define MPI_CRIT (4 | MLOG_CRIT) -#define MPI_ERR (4 | MLOG_ERR) -#define MPI_WARN (4 | MLOG_WARN) -#define MPI_NOTE (4 | MLOG_NOTE) -#define MPI_INFO (4 | MLOG_INFO) -#define MPI_DBG (4 | MLOG_DBG) -#define MPI_DBG0 (4 | MLOG_DBG0) -#define MPI_DAPI MPI_DBG0 -#define MPI_DBG1 (4 | MLOG_DBG1) -#define MPI_DINTAPI MPI_DBG1 -#define MPI_DBG2 (4 | MLOG_DBG2) -#define MPI_DCOMMON MPI_DBG2 -#define MPI_DBG3 (4 | MLOG_DBG3) -#define MPI_DRARE MPI_DBG3 - -#endif /* _MLOGFACS_H_ */ diff --git a/meta/src/client.c b/meta/src/client.c deleted file mode 100644 index 20a084f33..000000000 --- a/meta/src/client.c +++ /dev/null @@ -1,389 +0,0 @@ -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * - * Copyright 2017, UT-Battelle, LLC. - * - * LLNL-CODE-741539 - * All rights reserved. - * - * This is the license for UnifyFS. - * For details, see https://github.com/LLNL/UnifyFS. - * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. - */ - -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * Copyright (c) 2017, Florida State University. Contributions from - * the Computer Architecture and Systems Research Laboratory (CASTL) - * at the Department of Computer Science. - * - * Written by: Teng Wang, Adam Moody, Weikuan Yu, Kento Sato, Kathryn Mohror - * LLNL-CODE-728877. All rights reserved. - * - * This file is part of burstfs. - * For details, see https://github.com/llnl/burstfs - * Please read https://github.com/llnl/burstfs/LICENSE for full license text. - */ - -/* - * - * Copyright (c) 2014, Los Alamos National Laboratory - * All rights reserved. - * - */ - -#include -#include "mdhim.h" -#include "client.h" -#include "partitioner.h" -#include - -struct timeval msggetstart, msggetend; -double msggettime=0; - -struct timeval msgputstart, msgputend; -double msgputtime=0; -/** - * Send put to range server - * - * @param md main MDHIM struct - * @param pm pointer to put message to be sent or inserted into the range server's work queue - * @return return_message structure with ->error = MDHIM_SUCCESS or MDHIM_ERROR - */ -struct mdhim_rm_t *client_put(struct mdhim_t *md, struct mdhim_putm_t *pm) { - - int return_code; - struct mdhim_rm_t *rm; - - return_code = send_rangesrv_work(md, pm->basem.server_rank, pm); - // If the send did not succeed then log the error code and return MDHIM_ERROR - if (return_code != MDHIM_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: %d from server while sending " - "put record request", md->mdhim_rank, return_code); - return NULL; - } - - return_code = receive_client_response(md, pm->basem.server_rank, (void **) &rm); - // If the receive did not succeed then log the error code and return MDHIM_ERROR - if (return_code != MDHIM_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: %d from server while receiving " - "put record request", md->mdhim_rank, return_code); - rm->error = MDHIM_ERROR; - } - - // Return response message - return rm; -} - -/** - * Send bulk put to range server - * - * @param md main MDHIM struct - * @param bpm_list double pointer to an array of bulk put messages - * @return return_message structure with ->error = MDHIM_SUCCESS or MDHIM_ERROR - */ -struct mdhim_brm_t *client_bput(struct mdhim_t *md, struct index_t *index, - struct mdhim_bputm_t **bpm_list) { - int return_code; - struct mdhim_brm_t *brm_head, *brm_tail, *brm; - struct mdhim_rm_t **rm_list, *rm; - int i; - int *srvs; - int num_srvs; - - num_srvs = 0; - srvs = malloc(sizeof(int) * index->num_rangesrvs); - for (i = 0; i < index->num_rangesrvs; i++) { - if (!bpm_list[i]) { - continue; - } - - srvs[num_srvs] = bpm_list[i]->basem.server_rank; - num_srvs++; - } - - if (!num_srvs) { - free(srvs); - return NULL; - } - - gettimeofday(&msgputstart, NULL); - return_code = send_all_rangesrv_work(md, (void **) bpm_list, index->num_rangesrvs); - // If the send did not succeed then log the error code and return MDHIM_ERROR - if (return_code != MDHIM_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: %d from server while sending " - "bput record request", md->mdhim_rank, return_code); - - return NULL; - } - gettimeofday(&msgputend, NULL); - msgputtime += 1000000*(msgputend.tv_sec-msgputstart.tv_sec) + msgputend.tv_usec - msgputstart.tv_usec; - - rm_list = malloc(sizeof(struct mdhim_rm_t *) * num_srvs); - memset(rm_list, 0, sizeof(struct mdhim_rm_t *) * num_srvs); - return_code = receive_all_client_responses(md, srvs, num_srvs, (void ***) &rm_list); - // If the receives did not succeed then log the error code and return MDHIM_ERROR - if (return_code != MDHIM_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: %d from server while receiving " - "bput record requests", md->mdhim_rank, return_code); - } - - brm_head = brm_tail = NULL; - for (i = 0; i < num_srvs; i++) { - rm = rm_list[i]; - if (!rm) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - " - "Error: did not receive a response message in client_bput", - md->mdhim_rank); - //Skip this as the message doesn't exist - continue; - } - - brm = malloc(sizeof(struct mdhim_brm_t)); - brm->error = rm->error; - brm->basem.mtype = rm->basem.mtype; - brm->basem.server_rank = rm->basem.server_rank; - free(rm); - - //Build the linked list to return - brm->next = NULL; - if (!brm_head) { - brm_head = brm; - brm_tail = brm; - } else { - brm_tail->next = brm; - brm_tail = brm; - } - } - - free(rm_list); - free(srvs); - - // Return response message - return brm_head; -} - -/** Send bulk get to range server - * - * @param md main MDHIM struct - * @param bgm_list double pointer to an array or bulk get messages - * @return return_message structure with ->error = MDHIM_SUCCESS or MDHIM_ERROR - */ -struct mdhim_bgetrm_t *client_bget(struct mdhim_t *md, struct index_t *index, - struct mdhim_bgetm_t **bgm_list) { - int return_code; - struct mdhim_bgetrm_t *bgrm_head, *bgrm_tail, *bgrm; - struct mdhim_bgetrm_t **bgrm_list; - int i; - int *srvs; - int num_srvs; - - num_srvs = 0; - srvs = malloc(sizeof(int) * index->num_rangesrvs); - for (i = 0; i < index->num_rangesrvs; i++) { - if (!bgm_list[i]) { - continue; - } - - srvs[num_srvs] = bgm_list[i]->basem.server_rank; - num_srvs++; - } - - if (!num_srvs) { - free(srvs); - return NULL; - } - gettimeofday(&msggetstart, NULL); - return_code = send_all_rangesrv_work(md, (void **) bgm_list, index->num_rangesrvs); - // If the send did not succeed then log the error code and return MDHIM_ERROR - if (return_code != MDHIM_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: %d from server while sending " - "bget record request", md->mdhim_rank, return_code); - - return NULL; - } - gettimeofday(&msggetend, NULL); - msggettime += 1000000*(msggetend.tv_sec-msggetstart.tv_sec)+\ - msggetend.tv_usec-msggetstart.tv_usec; - - bgrm_list = malloc(sizeof(struct mdhim_bgetrm_t *) * num_srvs); - memset(bgrm_list, 0, sizeof(struct mdhim_bgetrm_t *) * num_srvs); - return_code = receive_all_client_responses(md, srvs, num_srvs, (void ***) &bgrm_list); - // If the receives did not succeed then log the error code and return MDHIM_ERROR - if (return_code != MDHIM_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: %d from server while receiving " - "bget record requests", md->mdhim_rank, return_code); - } - - bgrm_head = bgrm_tail = NULL; - for (i = 0; i < num_srvs; i++) { - bgrm = bgrm_list[i]; - if (!bgrm) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - " - "Error: did not receive a response message in client_bget", - md->mdhim_rank); - //Skip this as the message doesn't exist - continue; - } - //Build the linked list to return - bgrm->next = NULL; - if (!bgrm_head) { - bgrm_head = bgrm; - bgrm_tail = bgrm; - } else { - bgrm_tail->next = bgrm; - bgrm_tail = bgrm; - } - } - - free(bgrm_list); - free(srvs); - - // Return response message - return bgrm_head; -} - -/** Send get to range server with an op and number of records greater than one - * - * @param md main MDHIM struct - * @param gm pointer to get message to be sent or inserted into the range server's work queue - * @return return_message structure with ->error = MDHIM_SUCCESS or MDHIM_ERROR - */ -struct mdhim_bgetrm_t *client_bget_op(struct mdhim_t *md, struct mdhim_getm_t *gm) { - - int return_code; - struct mdhim_bgetrm_t *brm; - - return_code = send_rangesrv_work(md, gm->basem.server_rank, gm); - // If the send did not succeed then log the error code and return MDHIM_ERROR - if (return_code != MDHIM_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: %d from server while sending " - "get record request", md->mdhim_rank, return_code); - return NULL; - } - - return_code = receive_client_response(md, gm->basem.server_rank, (void **) &brm); - // If the receive did not succeed then log the error code and return MDHIM_ERROR - if (return_code != MDHIM_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: %d from server while receiving " - "get record request", md->mdhim_rank, return_code); - brm->error = MDHIM_ERROR; - } - - // Return response message - return brm; -} - -/** - * Send delete to range server - * - * @param md main MDHIM struct - * @param dm pointer to del message to be sent or inserted into the range server's work queue - * @return return_message structure with ->error = MDHIM_SUCCESS or MDHIM_ERROR - */ -struct mdhim_rm_t *client_delete(struct mdhim_t *md, struct mdhim_delm_t *dm) { - - int return_code; - struct mdhim_rm_t *rm; - - return_code = send_rangesrv_work(md, dm->basem.server_rank, dm); - // If the send did not succeed then log the error code and return MDHIM_ERROR - if (return_code != MDHIM_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: %d from server while sending " - "delete record request", md->mdhim_rank, return_code); - return NULL; - } - - return_code = receive_client_response(md, dm->basem.server_rank, (void **) &rm); - // If the receive did not succeed then log the error code and return MDHIM_ERROR - if (return_code != MDHIM_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: %d from server while receiving " - "delete record request", md->mdhim_rank, return_code); - rm->error = MDHIM_ERROR; - } - - // Return response - return rm; -} - -/** - * Send bulk delete to range server - * - * @param md main MDHIM struct - * @param bdm_list double pointer to an array of bulk del messages - * @return return_message structure with ->error = MDHIM_SUCCESS or MDHIM_ERROR - */ -struct mdhim_brm_t *client_bdelete(struct mdhim_t *md, struct index_t *index, - struct mdhim_bdelm_t **bdm_list) { - int return_code; - struct mdhim_brm_t *brm_head, *brm_tail, *brm; - struct mdhim_rm_t **rm_list, *rm; - int i; - int *srvs; - int num_srvs; - - num_srvs = 0; - srvs = malloc(sizeof(int) * index->num_rangesrvs); - for (i = 0; i < index->num_rangesrvs; i++) { - if (!bdm_list[i]) { - continue; - } - - srvs[num_srvs] = bdm_list[i]->basem.server_rank; - num_srvs++; - } - - return_code = send_all_rangesrv_work(md, (void **) bdm_list, index->num_rangesrvs); - // If the send did not succeed then log the error code and return MDHIM_ERROR - if (return_code != MDHIM_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: %d from server while sending " - "bdel record request", md->mdhim_rank, return_code); - - return NULL; - } - - rm_list = malloc(sizeof(struct mdhim_rm_t *) * num_srvs); - memset(rm_list, 0, sizeof(struct mdhim_rm_t *) * num_srvs); - return_code = receive_all_client_responses(md, srvs, num_srvs, (void ***) &rm_list); - // If the receives did not succeed then log the error code and return MDHIM_ERROR - if (return_code != MDHIM_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: %d from server while receiving " - "bdel record requests", md->mdhim_rank, return_code); - } - - brm_head = brm_tail = NULL; - for (i = 0; i < num_srvs; i++) { - rm = rm_list[i]; - if (!rm) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - " - "Error: did not receive a response message in client_bdel", - md->mdhim_rank); - //Skip this as the message doesn't exist - continue; - } - - brm = malloc(sizeof(struct mdhim_brm_t)); - brm->error = rm->error; - brm->basem.mtype = rm->basem.mtype; - brm->basem.server_rank = rm->basem.server_rank; - free(rm); - - //Build the linked list to return - brm->next = NULL; - if (!brm_head) { - brm_head = brm; - brm_tail = brm; - } else { - brm_tail->next = brm; - brm_tail = brm; - } - } - - free(rm_list); - free(srvs); - - // Return response message - return brm_head; -} diff --git a/meta/src/client.h b/meta/src/client.h deleted file mode 100644 index a744302eb..000000000 --- a/meta/src/client.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * - * Copyright 2017, UT-Battelle, LLC. - * - * LLNL-CODE-741539 - * All rights reserved. - * - * This is the license for UnifyFS. - * For details, see https://github.com/LLNL/UnifyFS. - * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. - */ - -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * Copyright (c) 2017, Florida State University. Contributions from - * the Computer Architecture and Systems Research Laboratory (CASTL) - * at the Department of Computer Science. - * - * Written by: Teng Wang, Adam Moody, Weikuan Yu, Kento Sato, Kathryn Mohror - * LLNL-CODE-728877. All rights reserved. - * - * This file is part of burstfs. - * For details, see https://github.com/llnl/burstfs - * Please read https://github.com/llnl/burstfs/LICENSE for full license text. - */ - -/* - * - * Copyright (c) 2014, Los Alamos National Laboratory - * All rights reserved. - * - */ - -#ifndef __CLIENT_H -#define __CLIENT_H - -#include "messages.h" - -struct mdhim_rm_t *client_put(struct mdhim_t *md, struct mdhim_putm_t *pm); -struct mdhim_brm_t *client_bput(struct mdhim_t *md, struct index_t *index, - struct mdhim_bputm_t **bpm_list); -struct mdhim_bgetrm_t *client_bget(struct mdhim_t *md, struct index_t *index, - struct mdhim_bgetm_t **bgm_list); -struct mdhim_bgetrm_t *client_bget_op(struct mdhim_t *md, struct mdhim_getm_t *gm); -struct mdhim_rm_t *client_delete(struct mdhim_t *md, struct mdhim_delm_t *dm); -struct mdhim_brm_t *client_bdelete(struct mdhim_t *md, struct index_t *index, - struct mdhim_bdelm_t **bdm_list); - -#endif diff --git a/meta/src/data_store.c b/meta/src/data_store.c deleted file mode 100644 index 71fb760d5..000000000 --- a/meta/src/data_store.c +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * - * Copyright 2017, UT-Battelle, LLC. - * - * LLNL-CODE-741539 - * All rights reserved. - * - * This is the license for UnifyFS. - * For details, see https://github.com/LLNL/UnifyFS. - * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. - */ - -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * Copyright (c) 2017, Florida State University. Contributions from - * the Computer Architecture and Systems Research Laboratory (CASTL) - * at the Department of Computer Science. - * - * Written by: Teng Wang, Adam Moody, Weikuan Yu, Kento Sato, Kathryn Mohror - * LLNL-CODE-728877. All rights reserved. - * - * This file is part of burstfs. - * For details, see https://github.com/llnl/burstfs - * Please read https://github.com/llnl/burstfs/LICENSE for full license text. - */ - -/* - * - * Copyright (c) 2014, Los Alamos National Laboratory - * All rights reserved. - * - */ - -#include -#include -#include "mdhim_options.h" -#include "data_store.h" -#ifdef LEVELDB_SUPPORT -#include "ds_leveldb.h" -#endif -#ifdef ROCKSDB_SUPPORT -#include "ds_leveldb.h" -#endif -#ifdef SOPHIADB_SUPPORT -#include "ds_sophia.h" -#endif -#ifdef MYSQLDB_SUPPORT -#include "ds_mysql.h" -#endif - - -/** - * mdhim_db_init - * Initializes mdhim_store_t structure based on type - * - * @param type in Database store type to use (i.e., LEVELDB, etc) - * @return mdhim_store_t The mdhim storage abstraction struct - */ -struct mdhim_store_t *mdhim_db_init(int type) { - struct mdhim_store_t *store; - - //Initialize the store structure - store = malloc(sizeof(struct mdhim_store_t)); - store->type = type; - store->db_handle = NULL; - store->db_stats = NULL; - store->mdhim_store_stats = NULL; - store->mdhim_store_stats_lock = malloc(sizeof(pthread_rwlock_t)); - if (pthread_rwlock_init(store->mdhim_store_stats_lock, NULL) != 0) { - free(store->mdhim_store_stats_lock); - return NULL; - } - - switch(type) { - -#ifdef LEVELDB_SUPPORT - case LEVELDB: - store->open = mdhim_leveldb_open; - store->put = mdhim_leveldb_put; - store->batch_put = mdhim_leveldb_batch_put; - store->get = mdhim_leveldb_get; - store->get_next = mdhim_leveldb_get_next; - store->get_prev = mdhim_leveldb_get_prev; - store->del = mdhim_leveldb_del; - store->commit = mdhim_leveldb_commit; - store->close = mdhim_leveldb_close; - break; - -#endif - -#ifdef ROCKSDB_SUPPORT - case ROCKSDB: - store->open = mdhim_leveldb_open; - store->put = mdhim_leveldb_put; - store->batch_put = mdhim_leveldb_batch_put; - store->get = mdhim_leveldb_get; - store->get_next = mdhim_leveldb_get_next; - store->get_prev = mdhim_leveldb_get_prev; - store->del = mdhim_leveldb_del; - store->commit = mdhim_leveldb_commit; - store->close = mdhim_leveldb_close; - break; -#endif - -#ifdef MYSQLDB_SUPPORT - case MYSQLDB: - store->open = mdhim_mysql_open; - store->put = mdhim_mysql_put; - store->batch_put = mdhim_mysql_batch_put; - store->get = mdhim_mysql_get; - store->get_next = mdhim_mysql_get_next; - store->get_prev = mdhim_mysql_get_prev; - store->del = mdhim_mysql_del; - store->commit = mdhim_mysql_commit; - store->close = mdhim_mysql_close; - break; -#endif - - - default: - free(store); - store = NULL; - break; - } - - return store; -} diff --git a/meta/src/data_store.h b/meta/src/data_store.h deleted file mode 100644 index 284b143c7..000000000 --- a/meta/src/data_store.h +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * - * Copyright 2017, UT-Battelle, LLC. - * - * LLNL-CODE-741539 - * All rights reserved. - * - * This is the license for UnifyFS. - * For details, see https://github.com/LLNL/UnifyFS. - * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. - */ - -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * Copyright (c) 2017, Florida State University. Contributions from - * the Computer Architecture and Systems Research Laboratory (CASTL) - * at the Department of Computer Science. - * - * Written by: Teng Wang, Adam Moody, Weikuan Yu, Kento Sato, Kathryn Mohror - * LLNL-CODE-728877. All rights reserved. - * - * This file is part of burstfs. - * For details, see https://github.com/llnl/burstfs - * Please read https://github.com/llnl/burstfs/LICENSE for full license text. - */ - -/* - * - * Copyright (c) 2014, Los Alamos National Laboratory - * All rights reserved. - * - */ - -#ifndef __STORE_H -#define __STORE_H - -#include "uthash.h" -#include "mdhim_options.h" - -/* Storage Methods */ -#define LEVELDB 1 //LEVELDB storage method -#define MYSQLDB 3 -#define ROCKSDB 4 //RocksDB -/* mdhim_store_t flags */ -#define MDHIM_CREATE 1 //Implies read/write -#define MDHIM_RDONLY 2 -#define MDHIM_RDWR 3 - -/* Keys for stats database */ -#define MDHIM_MAX_STAT 1 -#define MDHIM_MIN_STAT 2 -#define MDHIM_NUM_STAT 3 - -struct mdhim_store_t; -/* Function pointers for abstracting data stores */ -typedef int (*mdhim_store_open_fn_t)(void **db_handle, void **db_stats, char *path, int flags, - int key_type, struct mdhim_options_t *opts); -typedef int (*mdhim_store_put_fn_t)(void *db_handle, void *key, int32_t key_len, - void *data, int32_t data_len); -typedef int (*mdhim_store_batch_put_fn_t)(void *db_handle, void **keys, int32_t *key_lens, - void **data, int32_t *data_lens, int num_records); -typedef int (*mdhim_store_get_fn_t)(void *db_handle, void *key, int key_len, void **data, int32_t *data_len); -typedef int (*mdhim_store_get_next_fn_t)(void *db_handle, void **key, - int *key_len, void **data, - int32_t *data_len); -typedef int (*mdhim_store_get_prev_fn_t)(void *db_handle, void **key, - int *key_len, void **data, - int32_t *data_len); -typedef int (*mdhim_store_del_fn_t)(void *db_handle, void *key, int key_len); -typedef int (*mdhim_store_commit_fn_t)(void *db_handle); -typedef int (*mdhim_store_close_fn_t)(void *db_handle, void *db_stats); - -//Used for storing stats in a hash table -struct mdhim_stat; -struct mdhim_stat { - int key; //Key (slice number) - void *max; //Max key - void *min; //Min key - int dirty; //Wether this stat was updated or a new stat - uint64_t num; //Number of keys in this slice - struct mdhim_stat *stats; //Used for local index stats to create a multi-level hash table - UT_hash_handle hh; /* makes this structure hashable */ -}; - - -//Used for storing stats in the database -struct mdhim_db_stat { - int slice; - uint64_t imax; - uint64_t imin; - long double dmax; - long double dmin; - uint64_t num; -}; - -//Used for transmitting integer stats to all nodes -struct mdhim_db_istat { - int slice; - uint64_t num; - uint64_t imax; - uint64_t imin; -}; - -//Used for transmitting float stats to all nodes -struct mdhim_db_fstat { - int slice; - uint64_t num; - long double dmax; - long double dmin; -}; - -/* Generic mdhim storage object */ -struct mdhim_store_t { - int type; - //handle to db - void *db_handle; - //Handle to db for stats - void *db_stats; - //Pointers to functions based on data store - mdhim_store_open_fn_t open; - mdhim_store_put_fn_t put; - mdhim_store_batch_put_fn_t batch_put; - mdhim_store_get_fn_t get; - mdhim_store_get_next_fn_t get_next; - mdhim_store_get_prev_fn_t get_prev; - mdhim_store_del_fn_t del; - mdhim_store_commit_fn_t commit; - mdhim_store_close_fn_t close; - - //Login credentials - char *db_user; - char *db_upswd; - char *dbs_user; - char *dbs_upswd; - char *db_host; - char *dbs_host; - - //Hashtable for stats - struct mdhim_stat *mdhim_store_stats; - - //Lock to allow concurrent readers and a single writer to the mdhim_store_stats - pthread_rwlock_t *mdhim_store_stats_lock; -}; - -//Initializes the data store based on the type given (i.e., LEVELDB, etc...) -struct mdhim_store_t *mdhim_db_init(int db_type); -#endif diff --git a/meta/src/ds_leveldb.c b/meta/src/ds_leveldb.c deleted file mode 100644 index 602b38ca3..000000000 --- a/meta/src/ds_leveldb.c +++ /dev/null @@ -1,1337 +0,0 @@ -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * - * Copyright 2017, UT-Battelle, LLC. - * - * LLNL-CODE-741539 - * All rights reserved. - * - * This is the license for UnifyFS. - * For details, see https://github.com/LLNL/UnifyFS. - * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. - */ - -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * Copyright (c) 2017, Florida State University. Contributions from - * the Computer Architecture and Systems Research Laboratory (CASTL) - * at the Department of Computer Science. - * - * Written by: Teng Wang, Adam Moody, Weikuan Yu, Kento Sato, Kathryn Mohror - * LLNL-CODE-728877. All rights reserved. - * - * This file is part of burstfs. - * For details, see https://github.com/llnl/burstfs - * Please read https://github.com/llnl/burstfs/LICENSE for full license text. - */ - -/* - * - * Copyright (c) 2014, Los Alamos National Laboratory - * All rights reserved. - * - */ - -#include -#include -#include -#include -#include -#include -#include "ds_leveldb.h" - -struct timeval dbputstart, dbputend; -struct timeval dbgetstart, dbgetend; -double dbputtime=0, dbgettime=0; - -struct timeval dbngetstart, dbngetend; -double dbngettime=0; - -struct timeval dbbputstart, dbbputend; -double dbbputtime=0; - -extern int dbg_rank; -static void cmp_destroy(void* arg) { } - -static int cmp_empty(const char* a, size_t alen, - const char* b, size_t blen) { - int ret = 2; - if (a && !b) { - return 1; - } else if (!a && b) { - return -1; - } else if (!a && !b) { - return 0; - } - - if (alen > blen) { - return 1; - } else if (blen > alen) { - return -1; - } - - return ret; -} - -int cmp_int_compare(void* arg, const char* a, size_t alen, - const char* b, size_t blen) { - int ret; - - ret = cmp_empty(a, alen, b, blen); - if (ret != 2) { - return ret; - } - if (*(uint32_t *) a < *(uint32_t *) b) { - ret = -1; - } else if (*(uint32_t *) a == *(uint32_t *) b) { - ret = 0; - } else { - ret = 1; - } - - return ret; -} - -int cmp_lint_compare(void* arg, const char* a, size_t alen, - const char* b, size_t blen) { - int ret; - - ret = cmp_empty(a, alen, b, blen); - if (ret != 2) { - return ret; - } - if (*(uint64_t *) a < *(uint64_t *) b) { - ret = -1; - } else if (*(uint64_t *) a == *(uint64_t *) b) { - ret = 0; - } else { - ret = 1; - } - - return ret; -} - -static int cmp_double_compare(void* arg, const char* a, size_t alen, - const char* b, size_t blen) { - int ret; - - ret = cmp_empty(a, alen, b, blen); - if (ret != 2) { - return ret; - } - if (*(double *) a < *(double *) b) { - ret = -1; - } else if (*(double *) a == *(double *) b) { - ret = 0; - } else { - ret = 1; - } - - return ret; -} - -static int cmp_float_compare(void* arg, const char* a, size_t alen, - const char* b, size_t blen) { - int ret; - - ret = cmp_empty(a, alen, b, blen); - if (ret != 2) { - return ret; - } - if (*(float *) a < *(float *) b) { - ret = -1; - } else if (*(float *) a == *(float *) b) { - ret = 0; - } else { - ret = 1; - } - - return ret; -} - - -// For string, first compare for null pointers, then for order -// up to a null character or the given lengths. -static int cmp_string_compare(void* arg, const char* a, size_t alen, - const char* b, size_t blen) { - int idx; - - if (a && !b) { - return 1; - } else if (!a && b) { - return -1; - } else if (!a && !b) { - return 0; - } - - // Do this wile they are equal and we have not reached the end of one of them - for(idx=0; *a == *b && *a != '\0' && *b != '\0' && idx *b ) { // else compare the two different characters to decide - return 1; - } - - // If none of the above, then b is greater - return -1; -} - -static int cmp_byte_compare(void* arg, const char* a, size_t alen, - const char* b, size_t blen) { - int ret; - - long offset, old_offset; - long fid, old_fid; - - fid = *((unsigned long *)a); - old_fid = *((unsigned long *)b); - - offset = *((unsigned long *)a+1); - old_offset = *((unsigned long *)b+1); - - ret = fid - old_fid; - - if (ret != 0) - return ret; - else { - if (offset - old_offset > 0) - return 1; - else if(offset -old_offset < 0) - return -1; - else - return 0; - } - -// ret = memcmp(a, b, alen); - return ret; -} - -static int cmp_unifyfs_compare(void* arg, const char* a, size_t alen, - const char* b, size_t blen) { - int ret; - - long offset, old_offset; - long fid, old_fid; - - fid = *((unsigned long *)a); - old_fid = *((unsigned long *)b); - - offset = *((unsigned long *)a+1); - old_offset = *((unsigned long *)b+1); - - ret = fid - old_fid; - - if (ret != 0) - return ret; - else { - if (offset - old_offset > 0) - return 1; - else if(offset - old_offset < 0) - return -1; - else - return 0; - } - -// ret = memcmp(a, b, alen); - return ret; -} - -static const char* cmp_name(void* arg) { - return "mdhim_cmp"; -} - -/** - * mdhim_leveldb_open - * Opens the database - * - * @param dbh in double pointer to the leveldb handle - * @param dbs in double pointer to the leveldb statistics db handle - * @param path in path to the database file - * @param flags in flags for opening the data store - * @param mstore_opts in additional options for the data store layer - * - * @return MDHIM_SUCCESS on success or MDHIM_DB_ERROR on failure - */ - -int mdhim_leveldb_open(void **dbh, void **dbs, char *path, int flags, int key_type, struct mdhim_options_t *opts) { - struct mdhim_leveldb_t *mdhimdb; - struct mdhim_leveldb_t *statsdb; - leveldb_t *db; - char *err = NULL; - char stats_path[PATH_MAX]; - - mdhimdb = malloc(sizeof(struct mdhim_leveldb_t)); - memset(mdhimdb, 0, sizeof(struct mdhim_leveldb_t)); - statsdb = malloc(sizeof(struct mdhim_leveldb_t)); - memset(statsdb, 0, sizeof(struct mdhim_leveldb_t)); - - //Create the options for the main database - mdhimdb->options = leveldb_options_create(); - leveldb_options_set_create_if_missing(mdhimdb->options, 1); - //leveldb_options_set_compression(options, 0); - mdhimdb->filter = leveldb_filterpolicy_create_bloom(256); - mdhimdb->cache = leveldb_cache_create_lru(8388608); - mdhimdb->env = leveldb_create_default_env(); - mdhimdb->write_options = leveldb_writeoptions_create(); - leveldb_writeoptions_set_sync(mdhimdb->write_options, 0); - mdhimdb->read_options = leveldb_readoptions_create(); - leveldb_options_set_cache(mdhimdb->options, mdhimdb->cache); - leveldb_options_set_filter_policy(mdhimdb->options, mdhimdb->filter); - //leveldb_options_set_max_open_files(mdhimdb->options, 10000); - leveldb_options_set_max_open_files(mdhimdb->options, 10000); - leveldb_options_set_write_buffer_size(mdhimdb->options, 1048576); - leveldb_options_set_env(mdhimdb->options, mdhimdb->env); - //Create the options for the stat database - statsdb->options = leveldb_options_create(); - leveldb_options_set_create_if_missing(statsdb->options, 1); - //leveldb_options_set_compression(stat_options, 0); - statsdb->filter = leveldb_filterpolicy_create_bloom(16); - statsdb->cache = leveldb_cache_create_lru(1024); - statsdb->env = leveldb_create_default_env(); - statsdb->write_options = leveldb_writeoptions_create(); - leveldb_writeoptions_set_sync(statsdb->write_options, 0); - statsdb->read_options = leveldb_readoptions_create(); - leveldb_options_set_cache(statsdb->options, statsdb->cache); - leveldb_options_set_filter_policy(statsdb->options, statsdb->filter); - leveldb_options_set_write_buffer_size(statsdb->options, 1024); - leveldb_options_set_env(statsdb->options, statsdb->env); - - switch(key_type) { - case MDHIM_INT_KEY: - mdhimdb->cmp = leveldb_comparator_create(NULL, cmp_destroy, cmp_int_compare, cmp_name); - mdhimdb->compare = cmp_int_compare; - break; - case MDHIM_LONG_INT_KEY: - mdhimdb->cmp = leveldb_comparator_create(NULL, cmp_destroy, cmp_lint_compare, cmp_name); - mdhimdb->compare = cmp_lint_compare; - break; - case MDHIM_FLOAT_KEY: - mdhimdb->cmp = leveldb_comparator_create(NULL, cmp_destroy, cmp_float_compare, cmp_name); - mdhimdb->compare = cmp_float_compare; - break; - case MDHIM_DOUBLE_KEY: - mdhimdb->cmp = leveldb_comparator_create(NULL, cmp_destroy, cmp_double_compare, cmp_name); - mdhimdb->compare = cmp_double_compare; - break; - case MDHIM_STRING_KEY: - mdhimdb->cmp = leveldb_comparator_create(NULL, cmp_destroy, cmp_string_compare, cmp_name); - mdhimdb->compare = cmp_string_compare; - break; - case MDHIM_UNIFYFS_KEY: - mdhimdb->cmp = leveldb_comparator_create(NULL, cmp_destroy, cmp_unifyfs_compare, cmp_name); - mdhimdb->compare = cmp_unifyfs_compare; - default: - mdhimdb->cmp = leveldb_comparator_create(NULL, cmp_destroy, cmp_byte_compare, cmp_name); - mdhimdb->compare = cmp_byte_compare; - break; - } - - leveldb_options_set_comparator(mdhimdb->options, mdhimdb->cmp); - //Check to see if the given path + "_stat" and the null char will be more than the max - if (strlen(path) + 6 > PATH_MAX) { - mlog(MDHIM_SERVER_CRIT, "Error opening leveldb database - path provided is too long"); - return MDHIM_DB_ERROR; - } - - //Open the main database - db = leveldb_open(mdhimdb->options, path, &err); - - fflush(stdout); - mdhimdb->db = db; - //Set the output handle - *((struct mdhim_leveldb_t **) dbh) = mdhimdb; - if (err != NULL) { - mlog(MDHIM_SERVER_CRIT, "Error opening leveldb database, abc..., path is %s", path); - return MDHIM_DB_ERROR; - } - - //Open the stats database - sprintf(stats_path, "%s_stats", path); - statsdb->compare = cmp_int_compare; - statsdb->cmp = leveldb_comparator_create(NULL, cmp_destroy, cmp_int_compare, cmp_name); - leveldb_options_set_comparator(statsdb->options, statsdb->cmp); - db = leveldb_open(statsdb->options, stats_path, &err); - - statsdb->db = db; - *((struct mdhim_leveldb_t **) dbs) = statsdb; - - if (err != NULL) { - mlog(MDHIM_SERVER_CRIT, "Error opening leveldb database, def..., stats_path is %s", stats_path); - return MDHIM_DB_ERROR; - } - - return MDHIM_SUCCESS; -} - -/** - * mdhim_leveldb_put - * Stores a single key in the data store - * - * @param dbh in pointer to the leveldb handle - * @param key in void * to the key to store - * @param key_len in length of the key - * @param data in void * to the value of the key - * @param data_len in length of the value data - * @param mstore_opts in additional options for the data store layer - * - * @return MDHIM_SUCCESS on success or MDHIM_DB_ERROR on failure - */ -int mdhim_leveldb_put(void *dbh, void *key, int key_len, void *data, int32_t data_len) { - leveldb_writeoptions_t *options; - char *err = NULL; - struct mdhim_leveldb_t *mdhimdb = (struct mdhim_leveldb_t *) dbh; - struct timeval start, end; - - gettimeofday(&start, NULL); - options = mdhimdb->write_options; - leveldb_put(mdhimdb->db, options, key, key_len, data, data_len, &err); - gettimeofday(&end, NULL); - /* - * temporarily mute the error message until the file metadata - * operation is fully defined and implemented */ - - if (err != NULL) { - /* printf("in mdhim, rank:%d, key is %d, value len is %d, err is %s\n",\ - dbg_rank, *((int *)key), data_len, err); - fflush(stdout); */ - mlog(MDHIM_SERVER_CRIT, "Error putting key/value in leveldb"); - return MDHIM_DB_ERROR; - } - - mlog(MDHIM_SERVER_DBG, "Took: %d seconds to put the record", - (int) (end.tv_sec - start.tv_sec)); - - return MDHIM_SUCCESS; -} - -/** - * mdhim_leveldb_batch_put - * Stores multiple keys in the data store - * - * @param dbh in pointer to the leveldb handle - * @param keys in void ** to the key to store - * @param key_lens in int * to the lengths of the keys - * @param data in void ** to the values of the keys - * @param data_lens in int * to the lengths of the value data - * @param num_records in int for the number of records to insert - * @param mstore_opts in additional options for the data store layer - * - * @return MDHIM_SUCCESS on success or MDHIM_DB_ERROR on failure - */ -int mdhim_leveldb_batch_put(void *dbh, void **keys, int32_t *key_lens, - void **data, int32_t *data_lens, int num_records) { - gettimeofday(&dbbputstart, NULL); - leveldb_writeoptions_t *options; - char *err = NULL; - struct mdhim_leveldb_t *mdhimdb = (struct mdhim_leveldb_t *) dbh; - struct timeval start, end; - leveldb_writebatch_t* write_batch; - int i; - - gettimeofday(&start, NULL); - write_batch = leveldb_writebatch_create(); - options = mdhimdb->write_options; - for (i = 0; i < num_records; i++) { -/* printf("in ds, fid is %d, offset is %ld, nodeid is %ld, len %ld, key_len:%d, data_lens:%d, num_records:%ld\n", *((long *)(keys[i])),\ - *((long *)keys[i]+1), *(((long *)(data[i]))),\ - *((long *)((data[i]))+1), key_lens[i], data_lens[i], num_records); - fflush(stdout); -*/ - leveldb_writebatch_put(write_batch, keys[i], key_lens[i], - data[i], data_lens[i]); - } - - leveldb_write(mdhimdb->db, options, write_batch, &err); - leveldb_writebatch_destroy(write_batch); - if (err != NULL) { - mlog(MDHIM_SERVER_CRIT, "Error in batch put in leveldb"); - return MDHIM_DB_ERROR; - } - - gettimeofday(&end, NULL); - gettimeofday(&end, NULL); - - gettimeofday(&dbbputend, NULL); - dbbputtime+=1000000*(dbbputend.tv_sec-dbbputstart.tv_sec)+dbbputend.tv_usec-dbbputstart.tv_usec; - mlog(MDHIM_SERVER_DBG, "Took: %d seconds to put %d records", - (int) (end.tv_sec - start.tv_sec), num_records); - - return MDHIM_SUCCESS; -} - -/** - * mdhim_leveldb_get - * Gets a value, given a key, from the data store - * - * @param dbh in pointer to the leveldb db handle - * @param key in void * to the key to retrieve the value of - * @param key_len in length of the key - * @param data out void * to the value of the key - * @param data_len out pointer to length of the value data - * @param mstore_opts in additional options for the data store layer - * - * @return MDHIM_SUCCESS on success or MDHIM_DB_ERROR on failure - */ -int mdhim_leveldb_get(void *dbh, void *key, int key_len, void **data, int32_t *data_len) { -/* - printf("in getting\n"); - fflush(stdout); -*/ - leveldb_readoptions_t *options; - char *err = NULL; - struct mdhim_leveldb_t *mdhimdb = (struct mdhim_leveldb_t *) dbh; - int ret = MDHIM_SUCCESS; - void *ldb_data; - size_t ldb_data_len = 0; - - options = mdhimdb->read_options; - *data = NULL; - gettimeofday(&dbgetstart, NULL); - ldb_data = leveldb_get(mdhimdb->db, options, key, key_len, &ldb_data_len, &err); - if (err != NULL) { - mlog(MDHIM_SERVER_CRIT, "Error getting value in leveldb"); - return MDHIM_DB_ERROR; - } - - if (!ldb_data_len) { - ret = MDHIM_DB_ERROR; - return ret; - } - - *data_len = ldb_data_len; - *data = malloc(*data_len); - memcpy(*data, ldb_data, *data_len); - free(ldb_data); - gettimeofday(&dbgetend, NULL); - dbgettime+=1000000*(dbgetend.tv_sec-dbgetstart.tv_sec) + \ - dbgetend.tv_usec-dbgetstart.tv_usec; - return ret; -} - -/** - * mdhim_leveldb_get_next - * Gets the next key/value from the data store - * - * @param dbh in pointer to the unqlite db handle - * @param key out void ** to the key that we get - * @param key_len out int * to the length of the key - * @param data out void ** to the value belonging to the key - * @param data_len out int * to the length of the value data - * @param mstore_opts in additional cursor options for the data store layer - * - */ -int mdhim_leveldb_get_next(void *dbh, void **key, int *key_len, - void **data, int32_t *data_len) { - leveldb_readoptions_t *options; - struct mdhim_leveldb_t *mdhimdb = (struct mdhim_leveldb_t *) dbh; - int ret = MDHIM_SUCCESS; - leveldb_iterator_t *iter; - const char *res; - int len = 0; - void *old_key; - int old_key_len; - struct timeval start, end; - int cmp_ret = -5; - - gettimeofday(&dbngetstart, NULL); - //Init the data to return - *data = NULL; - *data_len = 0; - - gettimeofday(&start, NULL); - //Create the options and iterator - options = mdhimdb->read_options; - old_key = *key; - old_key_len = *key_len; - *key = NULL; - *key_len = 0; - - - iter = leveldb_create_iterator(mdhimdb->db, options); - - //If the user didn't supply a key, then seek to the first - if (!old_key || old_key_len == 0) { - - leveldb_iter_seek_to_first(iter); - } else { - - /* Seek to the passed in key. If that doesn't exist, iterate until we find one greater - or until we exhaust the keys.*/ - leveldb_iter_seek(iter, old_key, old_key_len); - - if (!leveldb_iter_valid(iter)) { - - leveldb_iter_seek_to_first(iter); - - while(leveldb_iter_valid(iter)) { - - res = leveldb_iter_key(iter, (size_t *) &len); - - if ((cmp_ret = mdhimdb->compare(NULL, res, len,\ - old_key, old_key_len)) > 0) { - break; - } - - leveldb_iter_next(iter); - } - } else { - - if (mdhimdb->compare(NULL, (leveldb_iter_key(iter,\ - (size_t *) &len)), len, old_key, old_key_len) == 0) - leveldb_iter_next(iter); - } - } - - if (!leveldb_iter_valid(iter)) { - /* - printf("error 1\n"); - fflush(stdout); - */ - goto error; - } - - res = leveldb_iter_value(iter, (size_t *) &len); - if (res) { - *data = malloc(len); - memcpy(*data, res, len); - *data_len = len; - } else { - *data = NULL; - *data_len = 0; - } - - res = leveldb_iter_key(iter, (size_t *) key_len); - if (res) { - *key = malloc(*key_len); - memcpy(*key, res, *key_len); - } else { - *key = NULL; - *key_len = 0; - } - - if (!*data) { - goto error; - /* - printf("error 2\n"); - fflush(stdout); - */ - } - - //Destroy iterator - leveldb_iter_destroy(iter); - gettimeofday(&end, NULL); - mlog(MDHIM_SERVER_DBG, "Took: %d seconds to get the next record", - (int) (end.tv_sec - start.tv_sec)); - gettimeofday(&dbngetend, NULL); - dbngettime += 1000000*(dbngetend.tv_sec-dbngetstart.tv_sec)+dbngetend.tv_usec-dbngetstart.tv_usec; - return ret; - -error: - gettimeofday(&dbngetend, NULL); - dbngettime += 1000000*(dbngetend.tv_sec-dbngetstart.tv_sec)+dbngetend.tv_usec-dbngetstart.tv_usec; - //Destroy iterator - leveldb_iter_destroy(iter); - *key = NULL; - *key_len = 0; - *data = NULL; - *data_len = 0; - return MDHIM_DB_ERROR; -} - - -/** - * mdhim_leveldb_get_prev - * Gets the prev key/value from the data store - * - * @param dbh in pointer to the unqlite db handle - * @param key out void ** to the key that we get - * @param key_len out int * to the length of the key - * @param data out void ** to the value belonging to the key - * @param data_len out int * to the length of the value data - * @param mstore_opts in additional cursor options for the data store layer - * - */ -int mdhim_leveldb_get_prev(void *dbh, void **key, int *key_len, - void **data, int32_t *data_len) { - leveldb_readoptions_t *options; - struct mdhim_leveldb_t *mdhimdb = (struct mdhim_leveldb_t *) dbh; - int ret = MDHIM_SUCCESS; - leveldb_iterator_t *iter; - const char *res; - int len = 0; - void *old_key; - int old_key_len; - struct timeval start, end; - - //Init the data to return - *data = NULL; - *data_len = 0; - - gettimeofday(&start, NULL); - - //Create the options and iterator - options = mdhimdb->read_options; - old_key = *key; - old_key_len = *key_len; - *key = NULL; - *key_len = 0; - - iter = leveldb_create_iterator(mdhimdb->db, options); - - //If the user didn't supply a key, then seek to the first - if (!old_key || old_key_len == 0) { - leveldb_iter_seek_to_last(iter); - } else { - leveldb_iter_seek(iter, old_key, old_key_len); - if (!leveldb_iter_valid(iter)) { - leveldb_iter_seek_to_last(iter); - while(leveldb_iter_valid(iter)) { - res = leveldb_iter_key(iter, (size_t *) &len); - if (mdhimdb->compare(NULL, res, len, old_key, old_key_len) < 0) { - break; - } - - leveldb_iter_prev(iter); - } - } else { - leveldb_iter_prev(iter); - } - } - - if (!leveldb_iter_valid(iter)) { - goto error; - } - - res = leveldb_iter_value(iter, (size_t *) &len); - if (res) { - *data = malloc(len); - memcpy(*data, res, len); - *data_len = len; - } else { - *data = NULL; - *data_len = 0; - } - - res = leveldb_iter_key(iter, (size_t *) key_len); - if (res) { - *key = malloc(*key_len); - memcpy(*key, res, *key_len); - } else { - *key = NULL; - *key_len = 0; - } - - if (!*data) { - goto error; - } - - //Destroy iterator - leveldb_iter_destroy(iter); - gettimeofday(&end, NULL); - mlog(MDHIM_SERVER_DBG, "Took: %d seconds to get the previous record", - (int) (end.tv_sec - start.tv_sec)); - return ret; - -error: - //Destroy iterator - leveldb_iter_destroy(iter); - *key = NULL; - *key_len = 0; - *data = NULL; - *data_len = 0; - return MDHIM_DB_ERROR; -} - -/** - * mdhim_leveldb_close - * Closes the data store - * - * @param dbh in pointer to the leveldb db handle - * @param dbs in pointer to the leveldb statistics db handle - * @param mstore_opts in additional options for the data store layer - * - * @return MDHIM_SUCCESS on success or MDHIM_DB_ERROR on failure - */ -int mdhim_leveldb_close(void *dbh, void *dbs) { - struct mdhim_leveldb_t *mdhimdb = (struct mdhim_leveldb_t *) dbh; - struct mdhim_leveldb_t *statsdb = (struct mdhim_leveldb_t *) dbs; - - //Close the databases - leveldb_close(mdhimdb->db); - leveldb_close(statsdb->db); - - //Destroy the options - leveldb_comparator_destroy(mdhimdb->cmp); - leveldb_options_destroy(mdhimdb->options); - leveldb_readoptions_destroy(mdhimdb->read_options); - leveldb_writeoptions_destroy(mdhimdb->write_options); - leveldb_filterpolicy_destroy(mdhimdb->filter); - leveldb_comparator_destroy(statsdb->cmp); - leveldb_options_destroy(statsdb->options); - leveldb_readoptions_destroy(statsdb->read_options); - leveldb_writeoptions_destroy(statsdb->write_options); - leveldb_filterpolicy_destroy(statsdb->filter); - - free(mdhimdb); - free(statsdb); - - return MDHIM_SUCCESS; -} - -/** - * mdhim_leveldb_del - * delete the given key - * - * @param dbh in pointer to the leveldb db handle - * @param key in void * for the key to delete - * @param key_len in int for the length of the key - * @param mstore_opts in additional options for the data store layer - * - * @return MDHIM_SUCCESS on success or MDHIM_DB_ERROR on failure - */ -int mdhim_leveldb_del(void *dbh, void *key, int key_len) { - leveldb_writeoptions_t *options; - char *err = NULL; - struct mdhim_leveldb_t *mdhimdb = (struct mdhim_leveldb_t *) dbh; - - options = mdhimdb->write_options; - leveldb_delete(mdhimdb->db, options, key, key_len, &err); - if (err != NULL) { - mlog(MDHIM_SERVER_CRIT, "Error deleting key in leveldb"); - return MDHIM_DB_ERROR; - } - - return MDHIM_SUCCESS; -} - -/** - * mdhim_leveldb_commit - * Commits outstanding writes the data store - * - * @param dbh in pointer to the leveldb handle - * - * @return MDHIM_SUCCESS on success or MDHIM_DB_ERROR on failure - */ -int mdhim_leveldb_commit(void *dbh) { - return MDHIM_SUCCESS; -} - - -/** - * mdhim_leveldb_batch_next - * get next (tot_records) starting from key (inclusive) - * - * @param dbh in pointer to the leveldb db handle - * @param key in a list of keys to be returned - * @param key_len in a list of key_length to be returned - * @param data in a list values to be returned corresponding to the keys - * @param data_len in a list of value length to be returned - * @param num_records in actual number of key-value pairs returned - * @return MDHIM_SUCCESS on success or MDHIM_DB_ERROR on failure - * @return - */ -int mdhim_leveldb_batch_next(void *dbh, char **key, int *key_len, - char **data, int32_t *data_len, - int tot_records, int *num_records) { - - gettimeofday(&dbngetstart, NULL); - struct mdhim_leveldb_t *mdhim_db = (struct mdhim_leveldb_t *) dbh; - int cursor = 0; - leveldb_readoptions_t *options; - leveldb_iterator_t *iter; - const char *res; - int len = 0; - void *old_key; - int old_key_len; - - options = mdhim_db->read_options; - old_key = key[0]; - old_key_len = key_len[0]; - - iter = leveldb_create_iterator(mdhim_db->db, options); - if (!old_key || old_key_len == 0) { - leveldb_iter_seek_to_first(iter); - - } else { - leveldb_iter_seek(iter, (char *)old_key, old_key_len); - - while(leveldb_iter_valid(iter) && cursor != tot_records) { - - res = leveldb_iter_value(iter, (size_t *)&len); - if (res) { - data[cursor] = (char *)malloc(len); - memcpy(data[cursor], res, len); - data_len[cursor] = len; - - } else { - data[cursor] = NULL; - data_len[cursor] = 0; - goto error; - } - - res = leveldb_iter_key(iter, (size_t *)&len); - if (res) { - key[cursor] = (char *)malloc(len); - memcpy(key[cursor], res, len); - key_len[cursor] = len; - - - } else { - key[cursor] = NULL; - key_len[cursor] = 0; - goto error; - } - - leveldb_iter_next(iter); - (*num_records)++; - cursor++; - } - } - gettimeofday(&dbngetend, NULL); - dbngettime +=\ - 1000000 * (dbngetend.tv_sec - dbngetstart.tv_sec)\ - + dbngetend.tv_usec - dbngetstart.tv_usec; - leveldb_iter_destroy(iter); - if (*num_records < tot_records) - return MDHIM_DB_ERROR; - else - return 0; -error: - gettimeofday(&dbngetend, NULL); - dbngettime += 1000000 * (dbngetend.tv_sec - dbngetstart.tv_sec)\ - + dbngetend.tv_usec - dbngetstart.tv_usec; - //Destroy iterator - leveldb_iter_destroy(iter); - return MDHIM_DB_ERROR; - -} - -/** - * leveldb_batch_ranges - * get a list of key-value pairs that fall in the range of a list of - * items identified (start_key, end_key) - * - * @param dbh in pointer to the leveldb db handle - * @param key in a list of start_key and end_key pairs - * @param key_len in a list of key_length for start_keys and end_keys - * @param out_keys in pointer to a list keys to be returned - * @param out_keys_len in pointer to a list of key_lengths to be returned - * @param out_val in pointer to a list of values to be returned - * @param out_val_len in pointer to a list of value lens to be returned - * @param num_ranges in number of start/end key ranges - * @param out_records_cnt in number of copied key-value pairs - * @return MDHIM_SUCCESS on success or MDHIM_DB_ERROR on failure - * @return - */ -int leveldb_batch_ranges(void *dbh, char **key, int32_t *key_len, - char ***out_keys, int32_t **out_keys_len, - char ***out_vals, int32_t **out_vals_len, - int num_ranges, int *out_records_cnt) { - - int i, start_ndx, end_ndx; - struct mdhim_leveldb_t *mdhim_db = (struct mdhim_leveldb_t *) dbh; - - int tmp_records_cnt = 0; /*the temporary number of out records*/ - int tmp_out_cap = num_ranges; /* the temporary out capacity*/ - - leveldb_iterator_t *iter; - leveldb_readoptions_t *options; - options = mdhim_db->read_options; - - iter = leveldb_create_iterator(mdhim_db->db, options); - - *out_keys = (char **) calloc(num_ranges, sizeof(char *)); - *out_keys_len = (int32_t *) calloc(num_ranges, sizeof(int32_t)); - - *out_vals = (char **) calloc(num_ranges, sizeof(char *)); - *out_vals_len = (int32_t *) calloc(num_ranges, sizeof(int32_t)); - - /*ToDo: return different error types if leveldb_process_range fails*/ - - for (i = 0; i < num_ranges; i++) { - start_ndx = 2 * i; - end_ndx = start_ndx + 1; - /* printf("range %d: fid is %d, start_offset=%zu end_offset=%zu\n", - * i, UNIFYFS_KEY_FID(key[start_ndx]), - * UNIFYFS_KEY_OFF(key[start_ndx]), - * UNIFYFS_KEY_OFF(key[end_ndx])); - */ - leveldb_process_range(iter, key[start_ndx], key[end_ndx], - key_len[start_ndx], - out_keys, out_keys_len, - out_vals, out_vals_len, - &tmp_records_cnt, &tmp_out_cap); - } - - *out_records_cnt = tmp_records_cnt; - - /* printf("out_records_cnt is %d\n", *out_records_cnt); - * for (i = 0; i < *out_records_cnt; i++) { - * printf("out %d: fid is %d, offset=%zu addr=%zu\n", - * i, UNIFYFS_KEY_FID((*out_keys)[i]), - * UNIFYFS_KEY_OFF((*out_keys)[i]), - * UNIFYFS_VAL_ADDR((*out_vals)[i])); - * } - * fflush(stdout); - */ - - leveldb_iter_destroy(iter); - return 0; -} - -/* - * for comments inside: - * start: start_key offset - * end: end_key offset - * prev_s: start offset of the K-V pair that precedes start - * prev_e: end offset of the K-V pair that precedes start - * (prev_e = prev_s + value length - 1) - * next_s: start offset of the K-V pair that follows start - * next_e: end offset of the K-V pair that follows start - * (next_e = next_s + value length - 1) - * */ -int leveldb_process_range(leveldb_iterator_t *iter, - char *start_key, char *end_key, int32_t key_len, - char ***out_keys, int32_t **out_keys_len, - char ***out_vals, int32_t **out_vals_len, - int *tmp_records_cnt, int *tmp_out_cap) { - - const char *ret_key, *ret_val; - size_t tmp_key_len, tmp_val_len; - const char *next_ret_key; - - int prev_flag = 0; - - leveldb_iter_seek(iter, (char *)start_key, (size_t)key_len); - if (!leveldb_iter_valid(iter)) { - // check last K-V - leveldb_iter_seek_to_last(iter); - if (!leveldb_iter_valid(iter)) - return 0; - - ret_key = leveldb_iter_key(iter, &tmp_key_len); - if (!ret_key) - return MDHIM_DB_ERROR; - else if (UNIFYFS_KEY_FID(ret_key) != UNIFYFS_KEY_FID(start_key)) - return 0; - - // last key matched fid, but not offset - prev_flag = 1; - } else { - ret_key = leveldb_iter_key(iter, &tmp_key_len); - if (!ret_key) - return MDHIM_DB_ERROR; - - if (UNIFYFS_KEY_FID(start_key) != UNIFYFS_KEY_FID(ret_key)) { - // mismatch on fid, check previous K-V - leveldb_iter_prev(iter); - if (!leveldb_iter_valid(iter)) { - return 0; - } - - ret_key = leveldb_iter_key(iter, &tmp_key_len); - if (!ret_key) - return MDHIM_DB_ERROR; - else if (UNIFYFS_KEY_FID(start_key) != UNIFYFS_KEY_FID(ret_key)) - return 0; - - prev_flag = 1; - } - } - - ret_val = leveldb_iter_value(iter, &tmp_val_len); - if (!ret_val) - return MDHIM_DB_ERROR; - - unsigned long start_off = UNIFYFS_KEY_OFF(start_key); - unsigned long end_off = UNIFYFS_KEY_OFF(end_key); - - if (prev_flag) { - // ret_key is previous K-V with matching fid - unsigned long prev_st = UNIFYFS_KEY_OFF(ret_key); - unsigned long prev_end = prev_st + UNIFYFS_VAL_LEN(ret_val) - 1; - if (start_off > prev_end) { - /* prev_s......prev_e; ...... start..end */ - return 0; - } - - unsigned long tmp_end; - if (end_off > prev_end) { - /* prev_s......prev_e; next_s......next_e - start...............end */ - tmp_end = prev_end; - } else { - /* prev_s......prev_e; next_s......next_e - start..end */ - tmp_end = end_off; - } - - assert((UNIFYFS_KEY_SZ == tmp_key_len) && - (UNIFYFS_VAL_SZ == tmp_val_len)); - char *ret_out_key = calloc(1, UNIFYFS_KEY_SZ); - char *ret_out_val = calloc(1, UNIFYFS_VAL_SZ); - - memcpy(ret_out_key, ret_key, UNIFYFS_KEY_SZ); - UNIFYFS_KEY_OFF(ret_out_key) = start_off; - - memcpy(ret_out_val, ret_val, UNIFYFS_VAL_SZ); - UNIFYFS_VAL_ADDR(ret_out_val) = UNIFYFS_VAL_ADDR(ret_val) - + (start_off - prev_st); - UNIFYFS_VAL_LEN(ret_out_val) = tmp_end - start_off + 1; - - add_kv(out_keys, out_keys_len, - out_vals, out_vals_len, - tmp_records_cnt, tmp_out_cap, - ret_out_key, ret_out_val, - tmp_key_len, tmp_val_len); - - return 0; - - } else if (UNIFYFS_KEY_OFF(ret_key) == start_off) { - // exact match on start offset - return handle_next_half(iter, start_key, end_key, - out_keys, out_keys_len, - out_vals, out_vals_len, - tmp_records_cnt, tmp_out_cap); - } - - leveldb_iter_prev(iter); - if (!leveldb_iter_valid(iter)) { - // already the first K-V, handle the rest of range - leveldb_iter_seek_to_first(iter); - return handle_next_half(iter, start_key, end_key, - out_keys, out_keys_len, - out_vals, out_vals_len, - tmp_records_cnt, tmp_out_cap); - } - - next_ret_key = ret_key; - ret_key = leveldb_iter_key(iter, &tmp_key_len); - if (!ret_key) - return MDHIM_DB_ERROR; - else if (UNIFYFS_KEY_FID(ret_key) != UNIFYFS_KEY_FID(start_key)) { - leveldb_iter_next(iter); - return handle_next_half(iter, start_key, end_key, - out_keys, out_keys_len, - out_vals, out_vals_len, - tmp_records_cnt, tmp_out_cap); - } - - ret_val = leveldb_iter_value(iter, &tmp_val_len); - if (!ret_val) - return MDHIM_DB_ERROR; - - unsigned long prev_st = UNIFYFS_KEY_OFF(ret_key); - unsigned long prev_end = prev_st + UNIFYFS_VAL_LEN(ret_val) - 1; - - if (start_off <= prev_end) { - int found_end = 0; - unsigned long tmp_end = prev_end; - if (end_off <= prev_end) { - /* prev_s......prev_e; next_s......next_e - * start....end - */ - found_end = 1; - tmp_end = end_off; - } - /* else prev_end < end_off - * prev_s......prev_e; next_s......next_e - * start..................end - */ - - assert((UNIFYFS_KEY_SZ == tmp_key_len) && - (UNIFYFS_VAL_SZ == tmp_val_len)); - char *ret_out_key = (char *) calloc(1, UNIFYFS_KEY_SZ); - char *ret_out_val = (char *) calloc(1, UNIFYFS_VAL_SZ); - - memcpy(ret_out_key, ret_key, UNIFYFS_KEY_SZ); - UNIFYFS_KEY_OFF(ret_out_key) = start_off; - - memcpy(ret_out_val, ret_val, UNIFYFS_VAL_SZ); - UNIFYFS_VAL_LEN(ret_out_val) = tmp_end - start_off + 1; - UNIFYFS_VAL_ADDR(ret_out_val) = UNIFYFS_VAL_ADDR(ret_val) + - (start_off - prev_st); - - add_kv(out_keys, out_keys_len, - out_vals, out_vals_len, - tmp_records_cnt, tmp_out_cap, - ret_out_key, ret_out_val, - tmp_key_len, tmp_val_len); - - if (found_end) { - return 0; - } - - // start at next to find rest of range - UNIFYFS_KEY_OFF(start_key) = UNIFYFS_KEY_OFF(next_ret_key); - leveldb_iter_next(iter); - } else { - /* start between prev and next, one of two cases: - * (1) prev_s......prev_e; next_s......next_e - * start............end - * - * (2) prev_s......prev_e; next_s......next_e - * start..........................end - */ - // look for start of range in next - leveldb_iter_next(iter); - } - - return handle_next_half(iter, start_key, end_key, - out_keys, out_keys_len, - out_vals, out_vals_len, - tmp_records_cnt, tmp_out_cap); -} - -int handle_next_half(leveldb_iterator_t *iter, - char *start_key, char *end_key, - char ***out_keys, int32_t **out_keys_len, - char ***out_vals, int32_t **out_vals_len, - int *tmp_records_cnt, int *tmp_out_cap) { - const char *ret_key, *ret_val; - size_t tmp_key_len, tmp_val_len; - - ret_key = leveldb_iter_key(iter, &tmp_key_len); - if (!ret_key) - return MDHIM_DB_ERROR; - - ret_val = leveldb_iter_value(iter, &tmp_val_len); - if (!ret_val) - return MDHIM_DB_ERROR; - - assert((UNIFYFS_KEY_SZ == tmp_key_len) && - (UNIFYFS_VAL_SZ == tmp_val_len)); - - unsigned long curr_off = UNIFYFS_KEY_OFF(ret_key); - unsigned long curr_end = curr_off + UNIFYFS_VAL_LEN(ret_val) - 1; - - unsigned long end_off = UNIFYFS_KEY_OFF(end_key); - - if (curr_off > end_off) { - // start..end precedes current K-V offset - return 0; - } - - char *ret_out_key; - char *ret_out_val; - - ret_out_key = (char *) calloc(1, UNIFYFS_KEY_SZ); - ret_out_val = (char *) calloc(1, UNIFYFS_VAL_SZ); - memcpy(ret_out_key, ret_key, UNIFYFS_KEY_SZ); - memcpy(ret_out_val, ret_val, UNIFYFS_VAL_SZ); - - if (end_off <= curr_end) { - // found end in current K-V, add slice - /* curr_s.........curr_e - [start]....end */ - - UNIFYFS_VAL_LEN(ret_out_val) = end_off - curr_off + 1; - - add_kv(out_keys, out_keys_len, - out_vals, out_vals_len, - tmp_records_cnt, tmp_out_cap, - ret_out_key, ret_out_val, - tmp_key_len, tmp_val_len); - return 0; - } - - // range fully covers current K-V, add it - add_kv(out_keys, out_keys_len, - out_vals, out_vals_len, - tmp_records_cnt, tmp_out_cap, - ret_out_key, ret_out_val, - tmp_key_len, tmp_val_len); - - // add subsequent K-Vs until end found (or fid mismatch) - int found_end = 0; - while (1) { - leveldb_iter_next(iter); - if (!leveldb_iter_valid(iter)) { - // end is past last K-V - break; - } - - ret_key = leveldb_iter_key(iter, (size_t *)&tmp_key_len); - if (!ret_key) - return MDHIM_DB_ERROR; - else if (UNIFYFS_KEY_FID(ret_key) != UNIFYFS_KEY_FID(start_key)) { - // fid mismatch - break; - } - - ret_val = leveldb_iter_value(iter, (size_t *)&tmp_val_len); - if (!ret_val) - return MDHIM_DB_ERROR; - - curr_off = UNIFYFS_KEY_OFF(ret_key); - curr_end = curr_off + UNIFYFS_VAL_LEN(ret_val) - 1; - - if (curr_off > end_off) { - // current K-V starts after end - break; - } - - assert((UNIFYFS_KEY_SZ == tmp_key_len) && - (UNIFYFS_VAL_SZ == tmp_val_len)); - ret_out_key = (char *) calloc(1, UNIFYFS_KEY_SZ); - ret_out_val = (char *) calloc(1, UNIFYFS_VAL_SZ); - memcpy(ret_out_key, ret_key, UNIFYFS_KEY_SZ); - memcpy(ret_out_val, ret_val, UNIFYFS_VAL_SZ); - - if (curr_end >= end_off) { - // found end in current K-V, add slice - found_end = 1; - UNIFYFS_VAL_LEN(ret_out_val) = end_off - curr_off + 1; - } - // else, range fully covers current K-V, add it - add_kv(out_keys, out_keys_len, - out_vals, out_vals_len, - tmp_records_cnt, tmp_out_cap, - ret_out_key, ret_out_val, - tmp_key_len, tmp_val_len); - - if (found_end) - break; - } - return 0; -} - -int add_kv(char ***out_keys, int32_t **out_keys_len, - char ***out_vals, int32_t **out_vals_len, - int *tmp_records_cnt, int *tmp_out_cap, - char *ret_key, char *ret_val, - size_t key_len, size_t val_len) { - int curr_cnt = *tmp_records_cnt; - if (curr_cnt == *tmp_out_cap) { - int new_cap = curr_cnt * 2; - *out_keys = (char **) realloc(*out_keys, - new_cap * sizeof(char *)); - *out_vals = (char **) realloc(*out_vals, - new_cap * sizeof(char *)); - *out_keys_len = (int32_t *) realloc(*out_keys_len, - new_cap * sizeof(int32_t)); - *out_vals_len = (int32_t *) realloc(*out_vals_len, - new_cap * sizeof(int32_t)); - *tmp_out_cap = new_cap; - } - - (*out_keys)[curr_cnt] = ret_key; - (*out_vals)[curr_cnt] = ret_val; - (*out_keys_len)[curr_cnt] = (int32_t)key_len; - (*out_vals_len)[curr_cnt] = (int32_t)val_len; - - *tmp_records_cnt = curr_cnt + 1; - return 0; -} diff --git a/meta/src/ds_leveldb.h b/meta/src/ds_leveldb.h deleted file mode 100644 index befef0c23..000000000 --- a/meta/src/ds_leveldb.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * - * Copyright 2017, UT-Battelle, LLC. - * - * LLNL-CODE-741539 - * All rights reserved. - * - * This is the license for UnifyFS. - * For details, see https://github.com/LLNL/UnifyFS. - * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. - */ - -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * Copyright (c) 2017, Florida State University. Contributions from - * the Computer Architecture and Systems Research Laboratory (CASTL) - * at the Department of Computer Science. - * - * Written by: Teng Wang, Adam Moody, Weikuan Yu, Kento Sato, Kathryn Mohror - * LLNL-CODE-728877. All rights reserved. - * - * This file is part of burstfs. - * For details, see https://github.com/llnl/burstfs - * Please read https://github.com/llnl/burstfs/LICENSE for full license text. - */ - -/* - * - * Copyright (c) 2014, Los Alamos National Laboratory - * All rights reserved. - * - */ - -#ifndef __LEVELDB_H -#define __LEVELDB_H - -#ifndef LEVELDB_SUPPORT -#include -#else -#include -#endif - -#include "mdhim.h" -#include "partitioner.h" -#include "data_store.h" - -#include "unifyfs_metadata_mdhim.h" - -/* Function pointer for comparator in C */ -typedef int (*mdhim_store_cmp_fn_t)(void* arg, const char* a, size_t alen, - const char* b, size_t blen); - -struct mdhim_leveldb_t { - leveldb_t *db; - leveldb_options_t *options; - leveldb_comparator_t* cmp; - leveldb_filterpolicy_t *filter; - leveldb_cache_t *cache; - leveldb_env_t *env; - leveldb_writeoptions_t *write_options; - leveldb_readoptions_t *read_options; - mdhim_store_cmp_fn_t compare; -}; - -int mdhim_leveldb_open(void **dbh, void **dbs, char *path, - int flags, int key_type, - struct mdhim_options_t *opts); -int mdhim_leveldb_put(void *dbh, void *key, int key_len, - void *data, int32_t data_len); -int mdhim_leveldb_get(void *dbh, void *key, int key_len, - void **data, int32_t *data_len); -int mdhim_leveldb_get_next(void *dbh, void **key, int *key_len, - void **data, int32_t *data_len); -int mdhim_leveldb_get_prev(void *dbh, void **key, int *key_len, - void **data, int32_t *data_len); -int mdhim_leveldb_close(void *dbh, void *dbs); -int mdhim_leveldb_del(void *dbh, void *key, int key_len); -int mdhim_leveldb_commit(void *dbh); -int mdhim_leveldb_batch_put(void *dbh, void **key, int32_t *key_lens, - void **data, int32_t *data_lens, int num_records); -int mdhim_leveldb_batch_next(void *dbh, char **key, - int *key_len, char **data, int32_t *data_len, - int tot_records, int *num_records); -int leveldb_batch_ranges(void *dbh, char **key, int32_t *key_len, - char ***out_key, int32_t **out_key_len, - char ***out_val, int32_t **out_val_len, - int num_ranges, int *out_records_cnt); -int leveldb_process_range(leveldb_iterator_t *iter, - char *start_key, char *end_key, int32_t key_len, - char ***out_key, int32_t **out_key_len, - char ***out_val, int32_t **out_val_len, - int *tmp_records_cnt, int *tmp_out_cap); -int handle_next_half(leveldb_iterator_t *iter, - char *start_key, char *end_key, - char ***out_key, int **out_key_len, - char ***out_val, int **out_val_len, - int *tmp_records_cnt, int *tmp_out_cap); -int add_kv(char ***out_key, int32_t **out_key_len, - char ***out_val, int32_t **out_val_len, - int *tmp_records_cnt, int *tmp_out_cap, - char *ret_key, char *ret_val, - size_t key_len, size_t val_len); - -#endif diff --git a/meta/src/ds_mysql.c b/meta/src/ds_mysql.c deleted file mode 100644 index de9cde98a..000000000 --- a/meta/src/ds_mysql.c +++ /dev/null @@ -1,1022 +0,0 @@ -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * - * Copyright 2017, UT-Battelle, LLC. - * - * LLNL-CODE-741539 - * All rights reserved. - * - * This is the license for UnifyFS. - * For details, see https://github.com/LLNL/UnifyFS. - * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. - */ - -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * Copyright (c) 2017, Florida State University. Contributions from - * the Computer Architecture and Systems Research Laboratory (CASTL) - * at the Department of Computer Science. - * - * Written by: Teng Wang, Adam Moody, Weikuan Yu, Kento Sato, Kathryn Mohror - * LLNL-CODE-728877. All rights reserved. - * - * This file is part of burstfs. - * For details, see https://github.com/llnl/burstfs - * Please read https://github.com/llnl/burstfs/LICENSE for full license text. - */ - -/* - * - * Copyright (c) 2014, Los Alamos National Laboratory - * All rights reserved. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include "ds_mysql.h" -#include -#include -#define MYSQL_BUFFER 1024 -#define MYSQLDB_HANDLE 1 -#define MYSQLDB_STAT_HANDLE 2 - -char *sb_key_copy(MYSQL *d, char *key_t, int key_len_t) { - if (key_len_t ==0) key_len_t = strlen(key_t); - char *k_copy = malloc(2+key_len_t+1); - memset(k_copy, 0, key_len_t+1); - mysql_real_escape_string(d, k_copy, key_t, key_len_t); - - return k_copy; - } - - -/* - *Put general functionf - * - *@param pdb in MYSQL pointer Database handle for connection and doing the escape stirnt - *@param key_t in void pointer Key for value - *@param k_len in int Key lenght - *@param data_t in void pointer Data to be inserted - *@param d_len in int Data Length - *@param t_name in char pointer Table name to execute insert - -*/ - -char *put_value(MYSQL *pdb, void *key_t, int k_len, void *data_t, int d_len, char *t_name, int pk_type){ - -char chunk[2*d_len+1]; -char kchunk[2*k_len+1]; - mysql_real_escape_string(pdb, chunk, data_t, d_len); - mysql_real_escape_string(pdb, kchunk, key_t, k_len); - // mysql_real_escape_string(db, key_t_insert,key_t,k_len); - //char *key_copy; - size_t st_len, size=0; - char *r_query=NULL, *st; - switch(pk_type){ - case MDHIM_BYTE_KEY: - case MDHIM_STRING_KEY: - //mysql_real_escape_string(pdb, k_chunk, key_t, k_len); - st = "Insert INTO %s (Id, Value) VALUES ('%s', '%s');"; - st_len = strlen(st); - size = 2*d_len+1 + 2*k_len+1 + strlen(t_name)+st_len;//strlen(chunk)+strlen(key_t_insert)+1; - r_query=malloc(sizeof(char)*(size)); - //key_copy = malloc(2*k_len+1); - //memset(key_copy, 0, 2*k_len+1); - //memcpy(key_copy, kchunk, 2*k_len+1); - snprintf(r_query, st_len + size, st, t_name, kchunk, chunk); - //free(key_copy); - break; - case MDHIM_FLOAT_KEY: - st = "Insert INTO %s (Id, Value) VALUES (%f, '%s');"; - st_len = strlen(st); - size = 2*d_len+1 + 2*k_len+1 + strlen(t_name)+st_len;//strlen(chunk)+strlen(key_t_insert)+1; - r_query=malloc(sizeof(char)*(size)); - snprintf(r_query, st_len + size, st, t_name, *((float*)key_t), chunk); - break; - case MDHIM_DOUBLE_KEY: - st = "Insert INTO %s (Id, Value) VALUES (%lf, '%s');"; - st_len = strlen(st); - size = 2*d_len+1 + 2*k_len+1 + strlen(t_name)+st_len;//strlen(chunk)+strlen(key_t_insert)+1; - r_query=malloc(sizeof(char)*(size)); - snprintf(r_query, st_len + size, st, t_name, *((double*)key_t), chunk); - break; - case MDHIM_INT_KEY: - st = "Insert INTO %s (Id, Value) VALUES (%d, '%s');"; - st_len = strlen(st); - size = 2*d_len+1 + 2*k_len+1 + strlen(t_name)+st_len;//strlen(chunk)+strlen(key_t_insert)+1; - r_query=malloc(sizeof(char)*(size)); - snprintf(r_query, st_len + size, st, t_name, *((int*)key_t), chunk); - break; - case MDHIM_LONG_INT_KEY: - st = "Insert INTO %s (Id, Value) VALUES (%ld, '%s');"; - st_len = strlen(st); - size = 2*d_len+1 + 2*k_len+1 + strlen(t_name)+st_len;//strlen(chunk)+strlen(key_t_insert)+1; - r_query=malloc(sizeof(char)*(size)); - snprintf(r_query, st_len + size, st, t_name, *((long*)key_t), chunk); - break; - } - return r_query; - -} - - -/* update_value - *Update the value if it didn't insert on put. - - *@param pdb in MYSQL pointer Database handle for connection and doing the escape stirnt - *@param key_t in void pointer Key for value - *@param k_len in int Key lenght - *@param data_t in void pointer Data to be inserted - *@param d_len in int Data Length - *@param t_name in char pointer Table name to execute insert - -*/ -char *update_value(MYSQL *pdb, void *key_t, int k_len, void *data_t, int d_len, char *t_name, int pk_type){ - -char chunk[2*d_len+1]; -char kchunk[2*k_len+1]; - mysql_real_escape_string(pdb, chunk, data_t, d_len); - mysql_real_escape_string(pdb, kchunk, key_t, k_len); - size_t st_len, size=0; - char *r_query=NULL, *st; - switch(pk_type){ - case MDHIM_BYTE_KEY: - case MDHIM_STRING_KEY: - st = "Update %s set Value = '%s' where Id = '%s';"; - st_len = strlen(st); - size = 2*d_len+1 + 2*k_len+1 + strlen(t_name)+st_len;//strlen(chunk)+strlen(key_t_insert)+1; - r_query=malloc(sizeof(char)*(size)); - snprintf(r_query, st_len + size, st, t_name, chunk, kchunk); - break; - case MDHIM_FLOAT_KEY: - st = "Update %s set Value = '%s' where Id = %f;"; - st_len = strlen(st); - size = 2*d_len+1 + 2*k_len+1 + strlen(t_name)+st_len;//strlen(chunk)+strlen(key_t_insert)+1; - r_query=malloc(sizeof(char)*(size)); - snprintf(r_query, st_len + size, st, t_name, chunk, *((float*)key_t) ); - break; - case MDHIM_DOUBLE_KEY: - st = "Update %s set Value = '%s' where Id = %lf;"; - st_len = strlen(st); - size = 2*d_len+1 + 2*k_len+1 + strlen(t_name)+st_len;//strlen(chunk)+strlen(key_t_insert)+1; - r_query=malloc(sizeof(char)*(size)); - snprintf(r_query, st_len + size, st, t_name, chunk, *((double*)key_t) ); - break; - case MDHIM_INT_KEY: - st = "Update %s set Value = '%s' where Id = %d;"; - st_len = strlen(st); - size = 2*d_len+1 + 2*k_len+1 + strlen(t_name)+st_len;//strlen(chunk)+strlen(key_t_insert)+1; - r_query=malloc(sizeof(char)*(size)); - snprintf(r_query, st_len + size, st, t_name,chunk, *((int*)key_t)); - break; - case MDHIM_LONG_INT_KEY: - st = "Update %s set Value = '%s' where Id = %ld;"; - st_len = strlen(st); - size = 2*d_len+1 + 2*k_len+1 + strlen(t_name)+st_len;//strlen(chunk)+strlen(key_t_insert)+1; - r_query=malloc(sizeof(char)*(size)); - snprintf(r_query, st_len + size, st, t_name, chunk, *((long*)key_t)); - break; - - } - - return r_query; - -} - - -/* create db - -* Helps to create the database for mysql if it does not exist -* @param dbmn Database name that will be used in the function -* @param db Database connection used to create the database -* Exit(1) is used to exit out if you don't have a connection - -*/ - -void create_db(char *dbmn, MYSQL *db){ - char q_create[MYSQL_BUFFER]; - memset(q_create, 0, MYSQL_BUFFER); - //int q =0; while (q==0) sleep(5); - snprintf(q_create, sizeof(char)*MYSQL_BUFFER, "USE %s", dbmn); - if (mysql_query(db, q_create)) - { - memset(q_create, 0, MYSQL_BUFFER); - snprintf(q_create, sizeof(char)*MYSQL_BUFFER,"CREATE DATABASE %s", dbmn); - //printf("\nCREATE DATABASE %s\n",dbmn); - if(mysql_query(db,q_create)) { - fprintf(stderr, "%s\n", mysql_error(db)); - mysql_close(db); - exit(1); - } - memset(q_create, 0, strlen(q_create)); - snprintf(q_create, sizeof(char)*MYSQL_BUFFER,"USE %s", dbmn); - if(mysql_query(db,q_create)) { - fprintf(stderr, "%s\n", mysql_error(db)); - mysql_close(db); - exit(1); - } //else printf("DATABASE IN USE"); - } - -} - -/* create_table fot table name - * @param dbmt Database table name to create the table/check to see if table is there - * @param db_name Database name for accessing the databaase - * @param k_type Key type that is used to create the table with the proper type for the key/value storage - **/ -void create_table(char *dbmt, MYSQL *db, char* db_name, int k_type){ - char name[MYSQL_BUFFER]; - //snprintf(name, "SHOW TABLES LIKE \'%s\'", dbmt); - //Create table and if it's there stop - if(mysql_query(db, "SHOW TABLES LIKE 'mdhim'")){ - fprintf(stderr, "%s\n", mysql_error(db)); - } - int check_er = mysql_errno(db); - //printf("\nThis is the error number: %d\n", check_er); - if (check_er == 1050){ - if(mysql_query(db, "Drop table mdhim")){ - fprintf(stderr, "%s\n", mysql_error(db)); - } - } - MYSQL_RES *table_res=mysql_store_result(db); - int row_count = table_res->row_count; - if(row_count == 0) - { - memset(name, 0, strlen(name)); - switch(k_type){ - case MDHIM_FLOAT_KEY: - snprintf(name, sizeof(char)*MYSQL_BUFFER, "CREATE TABLE %s( Id FLOAT PRIMARY KEY, Value LONGBLOB)", db_name); - break; - case MDHIM_DOUBLE_KEY: - snprintf(name, sizeof(char)*MYSQL_BUFFER, "CREATE TABLE %s( Id DOUBLE PRIMARY KEY, Value LONGBLOB)", db_name); - break; - case MDHIM_INT_KEY: - snprintf(name, sizeof(char)*MYSQL_BUFFER, "CREATE TABLE %s( Id BIGINT PRIMARY KEY, Value LONGBLOB)", db_name); - break; - case MDHIM_LONG_INT_KEY: - snprintf(name, sizeof(char)*MYSQL_BUFFER, "CREATE TABLE %s( Id BIGINT PRIMARY KEY, Value LONGBLOB)", db_name); - break; - case MDHIM_STRING_KEY: - case MDHIM_BYTE_KEY: - snprintf(name, sizeof(char)*MYSQL_BUFFER, "CREATE TABLE %s( Id VARCHAR(767) PRIMARY KEY, Value LONGBLOB)", db_name); - break; - } - - if(mysql_query(db, name)){ - fprintf(stderr, "%s\n", mysql_error(db)); - } - } - -} -/* str_to_key Helps convert the value in mysql_row into the proper key type and readability - * @param key_row Mysql_ROW type that has the key value - * @param key_type Value that determins what type the mysql_row info should be convereted into thus garanteting the proper key type - * @param size Gets size of the key type - **/ - -void * str_to_key(MYSQL_ROW key_row, int key_type, int * size){ - void * ret=NULL; - switch(key_type){ - case MDHIM_STRING_KEY: - *size= strlen(key_row[0]); - ret = malloc(*size); - ret=key_row[0]; - break; - case MDHIM_FLOAT_KEY: - *size= sizeof(float); - ret = malloc(*size); - *(float*)ret=strtol(key_row[0],NULL,10); - break; - case MDHIM_DOUBLE_KEY: - *size= sizeof(double); - char * endptr; - ret = malloc(*size); - *(double*)ret=strtod(key_row[0],&endptr); - break; - case MDHIM_INT_KEY: - *size= sizeof(int); - ret = malloc(*size); - *(int*)ret=strtol(key_row[0],NULL,10); - break; - case MDHIM_LONG_INT_KEY: - *size= sizeof(long); - ret = malloc(*size); - *(long*)ret=strtol(key_row[0],NULL,10); - break; - case MDHIM_BYTE_KEY: - *size= strlen(key_row[0]); - ret = malloc(*size); - ret=key_row[0]; - break; - } - - return ret; - -} - -/** - * mdhim_mysql_open - * Opens the database - * - * @param dbh in double pointer to the mysql handle - * @param dbs in double pointer to the mysql statistics db handle - * @param path in path to the database file - * @param flags in flags for opening the data store - * @param mstore_opts in additional options for the data store layer - * - * @return MDHIM_SUCCESS on success or MDHIM_DB_ERROR on failure - */ - - - -int mdhim_mysql_open(void **dbh, void **dbs, char *path, int flags,int key_type, struct mdhim_options_t *db_opts) { - struct MDI *Input_DB; - struct MDI *Stats_DB; - Input_DB = malloc(sizeof(struct MDI)); - Stats_DB = malloc(sizeof(struct MDI)); - MYSQL *db = mysql_init(NULL); - MYSQL *sdb = mysql_init(NULL); - if (db == NULL){ - fprintf(stderr, "%s\n", mysql_error(db)); - return MDHIM_DB_ERROR; - } - if (sdb == NULL){ - fprintf(stderr, "%s\n", mysql_error(db)); - return MDHIM_DB_ERROR; - } - /*char *path_s = malloc(sizeof(path)); - path_s = strcpy(path_s, path); */ - Input_DB ->host = db_opts->db_host; - Stats_DB->host = db_opts->dbs_host; - Input_DB ->user = db_opts->db_user; - Stats_DB ->user = db_opts->dbs_user; - Input_DB ->pswd = db_opts->db_upswd; - Stats_DB->pswd = db_opts->dbs_upswd; - //Abstracting the host, usernames, and password - Input_DB->database= "maindb"; //mstore_opts -> db_ptr4; //Abstracting Database - Input_DB->table = "mdhim"; - Stats_DB ->database = "statsdb";//mstore_opts -> db_ptr4; //Abstracting Statsics Database - Stats_DB->table = "mdhim"; - Input_DB->msqkt = db_opts->db_key_type; - Stats_DB->msqkt = db_opts->db_key_type; - - //connect to the Database - if (mysql_real_connect(db, Input_DB->host, Input_DB->user, Input_DB->pswd, - NULL, 0, NULL, 0) == NULL){ - fprintf(stderr, "%s\n", mysql_error(db)); - mysql_close(db); - return MDHIM_DB_ERROR; - } - if (mysql_real_connect(sdb, Stats_DB->host, Stats_DB->user, Stats_DB->pswd, - NULL, 0, NULL, 0) == NULL){ - fprintf(stderr, "%s\n", mysql_error(db)); - mysql_close(sdb); - return MDHIM_DB_ERROR; - } - if (mysql_library_init(0, NULL, NULL)) { - fprintf(stderr, "could not initialize MySQL library\n"); - return MDHIM_DB_ERROR; - } - - create_db(Input_DB->database, db); - create_table(Input_DB->database, db,Input_DB->table, Input_DB->msqkt); - create_db(Stats_DB->database, sdb); - create_table(Stats_DB->database, sdb,Stats_DB->table, Stats_DB->msqkt); - //Abstracting the host, usernames, and password - - - Input_DB->msqdb = db; - Input_DB->msqht = MYSQLDB_HANDLE; - Stats_DB->msqdb = sdb; - Stats_DB->msqht = MYSQLDB_STAT_HANDLE; - *dbh = Input_DB; - *dbs = Stats_DB; - - - - return MDHIM_SUCCESS; - -} -///////////////////////////////////////////////////////////////////////////////////////// - -/** - * mdhim_mysql_put - * Stores a single key in the data store - * - * @param dbh in pointer to the mysql struct which points to the handle - * @param key in void * to the key to store - * @param key_len in length of the key - * @param data in void * to the value of the key - * @param data_len in length of the value data - * @param mstore_opts in additional options for the data store layer - * - * @return MDHIM_SUCCESS on success or MDHIM_DB_ERROR on failure - */ -int mdhim_mysql_put(void *dbh, void *key, int key_len, void *data, int32_t data_len) { - - struct timeval start, end; - //printf("In put function\n"); - struct MDI *x = (struct MDI *)(dbh); - MYSQL *db = x->msqdb; - if (db == NULL) - { - fprintf(stderr, "%s\n", mysql_error(db)); - return MDHIM_DB_ERROR; - } - char *table_name; - table_name = x->table; - - gettimeofday(&start, NULL); - char *query; - //Insert key and value into table - query = put_value(db, key, key_len, data, data_len, table_name, x->msqkt); - //printf("\nThis is the query: \n%s\n", query); - - if (mysql_real_query(db, query, sizeof(char)*strlen(query))) { - int check_er = mysql_errno(db); - //printf("\nThis is the error number: %d\n", check_er); - if (check_er == 1062){ - memset(query, 0, sizeof(char)*strlen(query)); - query = update_value(db, key, key_len, data, data_len, table_name, x->msqkt); - //printf("\nThis is the query: \n%s\n", query); - - - if (mysql_real_query(db, query, sizeof(char)*strlen(query))) { - //printf("This is the query: %s\n", query); - mlog(MDHIM_SERVER_CRIT, "Error updating key/value in mysql\n"); - fprintf(stderr, "%s\n", mysql_error(db)); - return MDHIM_DB_ERROR; - } - //else printf("Sucessfully updated key/value in mdhim\n"); - } - else { - mlog(MDHIM_SERVER_CRIT, "Error putting key/value in mysql\nHere is the command:%s\n",query); - fprintf(stderr, "%s\n", mysql_error(db)); - return MDHIM_DB_ERROR; - } - } - //Report timing - gettimeofday(&end, NULL); - mlog(MDHIM_SERVER_DBG, "Took: %d seconds to put the record", - (int) (end.tv_sec - start.tv_sec)); - return MDHIM_SUCCESS; -} - -///////////////////////////////////////////////////////////////////////////////////////// - -/** - * mdhim_mysql_batch_put - * Stores a multiple keys in the data store - * - * @param dbh in pointer to the mysql struct which points to the handle - * @param keys in void ** to the key to store - * @param key_lens in int * to the lengths of the keys - * @param data in void ** to the values of the keys - * @param data_lens in int * to the lengths of the value data - * @param num_records in int for the number of records to insert - * - * @return MDHIM_SUCCESS on success or MDHIM_DB_ERROR on failure */ - -int mdhim_mysql_batch_put(void *dbh, void **keys, int32_t *key_lens, - void **data, int32_t *data_lens, int num_records) { - struct MDI *x = (struct MDI *)(dbh); - MYSQL *db = (MYSQL *) x->msqdb; - if (db == NULL) - { - fprintf(stderr, "%s\n", mysql_error(db)); - return MDHIM_DB_ERROR; - } - int i; - struct timeval start, end; - gettimeofday(&start, NULL); - char *table_name = x->table; - char *query=NULL; - //Insert X amount of Keys and Values - printf("Number records: %d\n", num_records); - for (i = 0; i < num_records; i++) { - query = put_value(db, keys[i], key_lens[i], data[i], data_lens[i], table_name, x->msqkt); - //printf("\nThis is the query: \n%s\n", query); - - if (mysql_real_query(db, query, sizeof(char)*strlen(query))) { - int check_er = mysql_errno(db); - //printf("\nThis is the error number: %d\n", check_er); - if (check_er == 1062){ - memset(query, 0, sizeof(char)*strlen(query)); - query = update_value(db, keys[i], key_lens[i], data[i], data_lens[i], table_name, x->msqkt); - //printf("\nThis is the query: \n%s\n", query); - - if (mysql_real_query(db, query, sizeof(char)*strlen(query))) { - //printf("This is the query: %s\n", query); - mlog(MDHIM_SERVER_CRIT, "Error updating key/value in mysql\n"); - fprintf(stderr, "%s\n", mysql_error(db)); - return MDHIM_DB_ERROR; - } - //else printf("Sucessfully updated key/value in mdhim\n"); - } - else { - mlog(MDHIM_SERVER_CRIT, "Error putting key/value in mysql\n"); - fprintf(stderr, "%s\n", mysql_error(db)); - return MDHIM_DB_ERROR; - } - } - - memset(query, 0, sizeof(query)); - } - - //Report timing - gettimeofday(&end, NULL); - mlog(MDHIM_SERVER_DBG, "Took: %d seconds to put %d records", - (int) (end.tv_sec - start.tv_sec), num_records); - return MDHIM_SUCCESS; -} - -///////////////////////////////////////////////////////////////////////////////////////// - - -/** - * mdhim_mysql_get - * Gets a value, given a key, from the data store - * - * @param dbh in pointer to the mysql struct which points to the handle - * @param key in void * to the key to retrieve the value of - * @param key_len in length of the key - * @param data out void * to the value of the key - * @param data_len out pointer to length of the value data - * @param mstore_opts in additional options for the data store layer - * - * @return MDHIM_SUCCESS on success or MDHIM_DB_ERROR on failure - */ -int mdhim_mysql_get(void *dbh, void *key, int key_len, void **data, int32_t *data_len){ - MYSQL_RES *data_res; - int ret = MDHIM_SUCCESS; - char get_value[MYSQL_BUFFER+key_len]; - MYSQL_ROW row; - void *msl_data; - char *table_name; - struct MDI *x = (struct MDI *)(dbh); - MYSQL *db = x->msqdb; - char *key_copy; - table_name = x->table; - - if (db == NULL) - { - fprintf(stderr, "%s\n", mysql_error(db)); - goto error; - } - - *data = NULL; - if (x->msqkt == MDHIM_STRING_KEY || - x->msqkt == MDHIM_BYTE_KEY) { - key_copy = sb_key_copy(db, key, key_len); - } - -//Create statement to go through and get the value based upon the key - - switch(x->msqkt){ - case MDHIM_STRING_KEY: - snprintf(get_value, sizeof(char)*(MYSQL_BUFFER+ key_len), "Select Value FROM %s WHERE Id = '%s'",table_name, key_copy); - free(key_copy); - break; - case MDHIM_FLOAT_KEY: - snprintf(get_value, sizeof(char)*MYSQL_BUFFER, "Select Value FROM %s WHERE Id = %f",table_name, *((float*)key)); - break; - case MDHIM_DOUBLE_KEY: - snprintf(get_value, sizeof(char)*MYSQL_BUFFER, "Select Value FROM %s WHERE Id = %lf",table_name, *((double*)key)); - break; - case MDHIM_INT_KEY: - snprintf(get_value, sizeof(char)*MYSQL_BUFFER, "Select Value FROM %s WHERE Id = %d",table_name, *((int*)key)); - break; - case MDHIM_LONG_INT_KEY: - snprintf(get_value, sizeof(char)*MYSQL_BUFFER, "Select Value FROM %s WHERE Id = %ld",table_name, *((long*)key)); - break; - case MDHIM_BYTE_KEY: - snprintf(get_value, sizeof(char)*(MYSQL_BUFFER+ key_len), "Select Value FROM %s WHERE Id = '%s'",table_name, key_copy); - free(key_copy); - break; - } -//Query and get results if no resuls get an error or else get the value - //printf("\nThis is the query: \n%s\n", get_value); - if (mysql_query(db,get_value)) { - if(x->msqht !=MYSQLDB_STAT_HANDLE) { - mlog(MDHIM_SERVER_CRIT, "Error getting value in mysql"); - printf("This is the error, get_value failed.\n"); - goto error; - } - } - data_res = mysql_store_result(db); - if (data_res->row_count == 0){ - mlog(MDHIM_SERVER_CRIT, "No row data selected"); - printf("This is the error, store row has nothing.\nHEre is query:%s\n", get_value); - goto error; - } - - row = mysql_fetch_row(data_res); - unsigned long *rl = mysql_fetch_lengths(data_res); - *data_len = *rl; - *data = malloc(*data_len+1); - msl_data = row[0]; - //printf("\nThis is the row : \n%s\n", (char*)row[0]); - if (!memcpy(*data, msl_data, *data_len)) { - mlog(MDHIM_SERVER_CRIT, "Error failed memory copy"); - printf("This is the error, get_value failed\n"); - goto error; - -} - mysql_free_result(data_res); - return ret; - -error: - *data=NULL; - *data_len = 0; - return MDHIM_DB_ERROR; -} - -///////////////////////////////////////////////////////////////////////////////////////// - -/** - * mdhim_mysql_del - * delete the given key - * - * @param dbh in pointer to the mysql struct which points to the handle - * @param key in void * for the key to delete - * @param key_len in int for the length of the key - * @param mstore_opts in additional options for the data store layer - * - * @return MDHIM_SUCCESS on success or MDHIM_DB_ERROR on failure - */ -int mdhim_mysql_del(void *dbh, void *key, int key_len) { - - struct MDI *x = (struct MDI *)(dbh); - MYSQL *db = x->msqdb; - if (db == NULL) - { - fprintf(stderr, "%s\n", mysql_error(db)); - return MDHIM_DB_ERROR; - } - char key_delete[MYSQL_BUFFER+key_len]; - //Delete the Key - char *table_name = x->table; - char *key_copy; - - - if (x->msqkt == MDHIM_STRING_KEY || - x->msqkt == MDHIM_BYTE_KEY) { - key_copy = sb_key_copy(db, key, key_len); - } - - switch(x->msqkt){ - case MDHIM_STRING_KEY: - snprintf(key_delete, sizeof(char)*(MYSQL_BUFFER+key_len),"Delete FROM %s WHERE Id = '%s'",table_name, (char*)key_copy); - break; - case MDHIM_FLOAT_KEY: - snprintf(key_delete, sizeof(char)*(MYSQL_BUFFER+key_len),"Delete FROM %s WHERE Id = %f",table_name, *((float*)key)); - break; - case MDHIM_DOUBLE_KEY: - snprintf(key_delete, sizeof(char)*(MYSQL_BUFFER+key_len),"Delete FROM %s WHERE Id = %lf",table_name, *((double*)key)); - break; - case MDHIM_INT_KEY: - snprintf(key_delete, sizeof(char)*(MYSQL_BUFFER+key_len),"Delete FROM %s WHERE Id = %d",table_name, *((int*)key)); - break; - case MDHIM_LONG_INT_KEY: - snprintf(key_delete, sizeof(char)*(MYSQL_BUFFER+key_len),"Delete FROM %s WHERE Id = %ld",table_name, *((long*)key)); - break; - case MDHIM_BYTE_KEY: - snprintf(key_delete, sizeof(char)*(MYSQL_BUFFER+key_len), "Delete FROM %s WHERE Id = '%s'",table_name, (char*)key_copy); - break; - } - if (mysql_query(db,key_delete)) { - mlog(MDHIM_SERVER_CRIT, "Error deleting key in mysql"); - return MDHIM_DB_ERROR; - } - //Reset error variable - return MDHIM_SUCCESS; -} - - -/** - * mdhim_mysql_close - * Closes the data store - * - * @param dbh in pointer to the mysql struct which points to the handle - * @param dbs in pointer to the statistics mysql struct which points to the statistics handle - * @param mstore_opts in additional options for the data store layer - * - * @return MDHIM_SUCCESS on success or MDHIM_DB_ERROR on failure - */ -int mdhim_mysql_close(void *dbh, void *dbs) { - - struct MDI *x = (struct MDI *)(dbh); - struct MDI *y = (struct MDI *)(dbs); - MYSQL *db = x->msqdb; - MYSQL *sdb = y->msqdb; -// if(mysql_query(db, "Drop table maindb.mdhim")){ -// mlog(MDHIM_SERVER_CRIT, "Error deleting key in mysql"); -// } -// if (mysql_query(sdb, "Drop table statsdb.mdhim")){ -// mlog(MDHIM_SERVER_CRIT, "Error deleting key in mysql"); -// } - mysql_close(db); - mysql_close(sdb); - free(x); - free(y); - return MDHIM_SUCCESS; -} - -///////////////////////////////////////////////////////////////////////////////////////// - - -/** - * mdhim_mysql_get_next - * Gets the next key/value from the data store - * - * @param dbh in pointer to the mysql struct which points to the handle - * @param key out void ** to the key that we get - * @param key_len out int * to the length of the key - * @param data out void ** to the value belonging to the key - * @param data_len out int * to the length of the value data - * @param mstore_opts in additional cursor options for the data store layer - * - */ -int mdhim_mysql_get_next(void *dbh, void **key, int *key_len, - void **data, int32_t *data_len) { - struct MDI *x = (struct MDI *)(dbh); - MYSQL *db = x->msqdb; - if (db == NULL) - { - fprintf(stderr, "%s\n", mysql_error(db)); - return MDHIM_DB_ERROR; - } - int ret = MDHIM_SUCCESS; - void *old_key, *msl_key, *msl_data; - struct timeval start, end; - int key_lg; - if (!key_len){ key_lg = 0; *key=NULL;} - else key_lg = *key_len; - - char get_next[MYSQL_BUFFER+key_lg]; - MYSQL_RES *key_result; - MYSQL_ROW key_row; - char *table_name; - table_name = x->table; - - gettimeofday(&start, NULL); - old_key = *key; - char *key_copy; - if (old_key) key_copy = sb_key_copy(db, (char*)old_key, *key_len); - if (key_len) { - *key = NULL; - *key_len = 0; - } - else{ - *key = NULL; - key_len = &key_lg; - } - *data = NULL; - *data_len = 0; - - //Get the Key from the tables and if there was no old key, use the first one. - if (!old_key){ - snprintf(get_next, sizeof(char)*(MYSQL_BUFFER+*key_len), "Select * From %s where Id = (Select min(Id) from %s)", table_name, table_name); - if(mysql_query(db, get_next)) { - mlog(MDHIM_SERVER_DBG2, "Could not get the next key/value"); - goto error; - } - - } else { - switch(x->msqkt){ - case MDHIM_STRING_KEY: - snprintf(get_next, sizeof(char)*(MYSQL_BUFFER+*key_len),"Select * From %s where Id = (Select min(Id) from %s where Id >'%s')", table_name,table_name, key_copy); - free(key_copy); - break; - case MDHIM_FLOAT_KEY: - snprintf(get_next, sizeof(char)*(MYSQL_BUFFER+*key_len), "Select * From %s where Id = (Select min(Id) from %s where Id >%f)", table_name,table_name, *((float*)old_key)); - break; - case MDHIM_DOUBLE_KEY: - snprintf(get_next, sizeof(char)*(MYSQL_BUFFER+*key_len), "Select * From %s where Id = (Select min(Id) from %s where Id >%lf)", table_name,table_name, *((double*)old_key)); - break; - case MDHIM_INT_KEY: - snprintf(get_next, sizeof(char)*(MYSQL_BUFFER+*key_len), "Select * From %s where Id = (Select min(Id) from %s where Id >%d)", table_name,table_name, *((int*)old_key)); - break; - case MDHIM_LONG_INT_KEY: - snprintf(get_next, sizeof(char)*(MYSQL_BUFFER+*key_len), "Select * From %s where Id = (Select min(Id) from %s where Id >%ld)", table_name,table_name, *((long*)old_key)); - break; - case MDHIM_BYTE_KEY: - snprintf(get_next, sizeof(char)*(MYSQL_BUFFER+*key_len), "Select * From %s where Id = (Select min(Id) from %s where Id > '%s')", table_name,table_name, key_copy); - free(key_copy); - break; - } - //snprintf(get_next, "Select * From %s where Id = (Select min(Id) from %s where Id >%d)", table_name, table_name, *(int*)old_key); - if(mysql_query(db, get_next)) { - mlog(MDHIM_SERVER_DBG2, "Could not get the next key/value"); - goto error; - } - } - - - //STore the result, you MUST use mysql_store_result because of it being parallel - key_result = mysql_store_result(db); - - if (key_result->row_count == 0) { - mlog(MDHIM_SERVER_DBG2, "Could not get mysql result"); - goto error; - } - key_row = mysql_fetch_row(key_result); - unsigned long *dl = mysql_fetch_lengths(key_result); - int r_size; - msl_key = str_to_key(key_row, x->msqkt, &r_size); - *key_len = r_size; - *data_len = dl[1]; - msl_data = key_row[1]; - - //Allocate data and key to mdhim program - if (key_row && *key_row) { - *key = malloc(*key_len+1); - memset(*key, 0, *key_len+1); - memcpy(*key, msl_key, *key_len); - *data = malloc(*data_len+1); - memset(*data, 0, *data_len+1); - memcpy(*data, msl_data, *data_len+1); - //printf("\nCopied here\n"); - - } else { - *key = NULL; - *key_len = 0; - *data = NULL; - *data_len = 0; - printf("\nNot Copied here\n"); - } - gettimeofday(&end, NULL); - mlog(MDHIM_SERVER_DBG, "Took: %d seconds to get the next record", - (int) (end.tv_sec - start.tv_sec)); - - - return ret; - -error: - *key = NULL; - *key_len = 0; - *data = NULL; - *data_len = 0; - return MDHIM_DB_ERROR; - -} - - -///////////////////////////////////////////////////////////////////////////////////////// -/** - * mdhim_mysql_get_prev - * Gets the previous key/value from the data store - * - * @param dbh in pointer to the unqlite db handle - * @param key out void ** to the key that we get - * @param key_len out int * to the length of the key - * @param data out void ** to the value belonging to the key - * @param data_len out int * to the length of the value data - * @param mstore_opts in additional cursor options for the data store layer - * - */ - -int mdhim_mysql_get_prev(void *dbh, void **key, int *key_len, - void **data, int32_t *data_len){ - struct MDI *x = (struct MDI *)(dbh); - MYSQL *db = x->msqdb; - if (db == NULL) - { - fprintf(stderr, "%s\n", mysql_error(db)); - return MDHIM_DB_ERROR; - } - int ret = MDHIM_SUCCESS; - void *old_key; - struct timeval start, end; - int key_lg; - if (!key_len){ key_lg = 0; *key = NULL;} - else key_lg = *key_len; - - char get_prev[MYSQL_BUFFER+key_lg]; - MYSQL_RES *key_result; - MYSQL_ROW key_row; - void *msl_data; - void *msl_key; - char *table_name; - //Init the data to return - gettimeofday(&start, NULL); - old_key = *key; - char *key_copy; - if (x->msqkt == MDHIM_STRING_KEY || - x->msqkt == MDHIM_BYTE_KEY) { - if (old_key) key_copy = sb_key_copy(db, (char*)old_key, *key_len); - } - //Start with Keys/data being null - if (key_len) { - *key = NULL; - *key_len = 0; - } - else{ - *key = NULL; - key_len = &key_lg; - } - *data = NULL; - *data_len = 0; - - table_name = x->table; - - //Get the Key/Value from the tables and if there was no old key, use the last one. - - if (!old_key){ - snprintf(get_prev, sizeof(char)*(MYSQL_BUFFER+*key_len),"Select * from %s where Id = (Select max(Id) From %s)", table_name,table_name); - if(mysql_query(db, get_prev)) { - mlog(MDHIM_SERVER_DBG2, "Could not get the previous key/value"); - goto error; - } - - } else { - - switch(x->msqkt){ - case MDHIM_STRING_KEY: - snprintf(get_prev, sizeof(char)*(MYSQL_BUFFER+*key_len), "Select * From %s where Id = (Select max(Id) from %s where Id < '%s')", table_name,table_name, key_copy); - free(key_copy); - break; - case MDHIM_FLOAT_KEY: - snprintf(get_prev, sizeof(char)*(MYSQL_BUFFER+*key_len),"Select * From %s where Id = (Select max(Id) from %s where Id <%f)", table_name,table_name, *((float*)old_key)); - break; - case MDHIM_DOUBLE_KEY: - snprintf(get_prev, sizeof(char)*(MYSQL_BUFFER+*key_len),"Select * From %s where Id = (Select max(Id) from %s where Id <%lf)", table_name,table_name, *((double*)old_key)); - break; - case MDHIM_INT_KEY: - snprintf(get_prev, sizeof(char)*(MYSQL_BUFFER+*key_len),"Select * From %s where Id = (Select max(Id) from %s where Id <%d)", table_name,table_name, *((int*)old_key)); - break; - case MDHIM_LONG_INT_KEY: - snprintf(get_prev, sizeof(char)*(MYSQL_BUFFER+*key_len),"Select * From %s where Id = (Select max(Id) from %s where Id <%ld)", table_name,table_name, *((long*)old_key)); - break; - case MDHIM_BYTE_KEY: - snprintf(get_prev, sizeof(char)*(MYSQL_BUFFER+*key_len),"Select * From %s where Id = (Select max(Id) from %s where Id < '%s')", table_name,table_name, key_copy); - free(key_copy); - break; - } - - - //Query the database - if(mysql_query(db, get_prev)) { - mlog(MDHIM_SERVER_DBG2, "Could not get the previous key/value"); - goto error; - } - } - //STore the result, you MUST use mysql_store_result because of it being parallel - key_result = mysql_store_result(db); - - if (key_result->row_count == 0) { - mlog(MDHIM_SERVER_DBG2, "Could not get mysql result"); - goto error; - } - //Fetch row and get data from database - key_row = mysql_fetch_row(key_result); - unsigned long *dl = mysql_fetch_lengths(key_result); - int r_size; - msl_key = str_to_key(key_row, x->msqkt, &r_size); - *key_len = r_size; - *data_len = dl[1]; - msl_data = key_row[1]; - - //Allocate data and key to mdhim program - if (key_row && *key_row) { - *key = malloc(*key_len+1); - memset(*key, 0, *key_len+1); - memcpy(*key, msl_key, *key_len); - *data = malloc(*data_len); - memset(*data, 0, *data_len); - memcpy(*data, msl_data, *data_len); - //printf("\nCopied here\n"); - - } else { - *key = NULL; - *key_len = 0; - *data = NULL; - *data_len = 0; - } - - mysql_free_result(key_result); - //End timing - gettimeofday(&end, NULL); - mlog(MDHIM_SERVER_DBG, "Took: %d seconds to get the prev record", - (int) (end.tv_sec - start.tv_sec)); - return ret; -error: - *key = NULL; - *key_len = 0; - *data = NULL; - *data_len = 0; - return MDHIM_DB_ERROR; -} - - -int mdhim_mysql_commit(void *dbh) { - return MDHIM_SUCCESS; -} diff --git a/meta/src/ds_mysql.h b/meta/src/ds_mysql.h deleted file mode 100644 index 15d87c441..000000000 --- a/meta/src/ds_mysql.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * - * Copyright 2017, UT-Battelle, LLC. - * - * LLNL-CODE-741539 - * All rights reserved. - * - * This is the license for UnifyFS. - * For details, see https://github.com/LLNL/UnifyFS. - * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. - */ - -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * Copyright (c) 2017, Florida State University. Contributions from - * the Computer Architecture and Systems Research Laboratory (CASTL) - * at the Department of Computer Science. - * - * Written by: Teng Wang, Adam Moody, Weikuan Yu, Kento Sato, Kathryn Mohror - * LLNL-CODE-728877. All rights reserved. - * - * This file is part of burstfs. - * For details, see https://github.com/llnl/burstfs - * Please read https://github.com/llnl/burstfs/LICENSE for full license text. - */ - -/* - * - * Copyright (c) 2014, Los Alamos National Laboratory - * All rights reserved. - * - */ - -#include -#include -#include "mdhim.h" -#include "data_store.h" - -//Struct for MYSQL Handling -//MDI = Mysql Database Info -//Too lazy to Edit name -struct MDI { - MYSQL *msqdb; //Database connection - int msqht; //Handle's specfication, whether it's the original or stat - int msqkt; - char *host; - char *user; - char *pswd; - char *table; - char *database; -}; - -int mdhim_mysql_open(void **dbh, void **dbs, char *path, int flags, int key_type, struct mdhim_options_t *opts); -int mdhim_mysql_put(void *dbh, void *key, int key_len, void *data, int32_t data_len); -int mdhim_mysql_get(void *dbh, void *key, int key_len, void **data, int32_t *data_len); -int mdhim_mysql_get_next(void *dbh, void **key, int *key_len, - void **data, int32_t *data_len); -int mdhim_mysql_get_prev(void *dbh, void **key, int *key_len, - void **data, int32_t *data_len); -int mdhim_mysql_close(void *dbh, void *dbs); -int mdhim_mysql_del(void *dbh, void *key, int key_len); -int mdhim_mysql_commit(void *dbh); -int mdhim_mysql_batch_put(void *dbh, void **key, int32_t *key_lens, - void **data, int32_t *data_lens, int num_record); diff --git a/meta/src/indexes.c b/meta/src/indexes.c deleted file mode 100644 index 0721a3cf8..000000000 --- a/meta/src/indexes.c +++ /dev/null @@ -1,1776 +0,0 @@ -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * - * Copyright 2017, UT-Battelle, LLC. - * - * LLNL-CODE-741539 - * All rights reserved. - * - * This is the license for UnifyFS. - * For details, see https://github.com/LLNL/UnifyFS. - * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. - */ - -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * Copyright (c) 2017, Florida State University. Contributions from - * the Computer Architecture and Systems Research Laboratory (CASTL) - * at the Department of Computer Science. - * - * Written by: Teng Wang, Adam Moody, Weikuan Yu, Kento Sato, Kathryn Mohror - * LLNL-CODE-728877. All rights reserved. - * - * This file is part of burstfs. - * For details, see https://github.com/llnl/burstfs - * Please read https://github.com/llnl/burstfs/LICENSE for full license text. - */ - -/* - * - * Copyright (c) 2014, Los Alamos National Laboratory - * All rights reserved. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "mdhim.h" -#include "indexes.h" - -struct timeval hashopstart, hashopend; -double hashoptime=0; - -struct timeval cmpstart, cmpend; -double cmptime=0; - -struct timeval metastart, metaend; -double metatime=0; - -struct timeval sleepstart, sleepend; -double sleeptime=0; -/** - * to_lower - * convert strings to all lower case - * - */ -void to_lower(size_t in_length, char *in, char *out) { - memset(out, 0, in_length); - - // Make sure that the name passed is lowercase - int i=0; - for(i=0; i < in_length; i++) { - out[i] = tolower(in[i]); - } -} - -/** - * im_range_server - * checks if I'm a range server - * - * @param md Pointer to the main MDHIM structure - * @return 0 if false, 1 if true - */ - -int im_range_server(struct index_t *index) { - if (index->myinfo.rangesrv_num > 0) { - return 1; - } - - return 0; -} - -/** - * open_manifest - * Opens the manifest file - * - * @param md Pointer to the main MDHIM structure - * @param flags Flags to open the file with - */ -int open_manifest(struct mdhim_t *md, struct index_t *index, int flags) { - int fd; - char path[PATH_MAX]; - - sprintf(path, "%s%d_%d_%d", md->db_opts->manifest_path, index->type, - index->id, md->mdhim_rank); - fd = open(path, flags, 00600); - if (fd < 0) { - mlog(MDHIM_SERVER_DBG, "Rank: %d - Error opening manifest file", - md->mdhim_rank); - } - - return fd; -} - -/** - * write_manifest - * Writes out the manifest file - * - * @param md Pointer to the main MDHIM structure - */ -void write_manifest(struct mdhim_t *md, struct index_t *index) { - index_manifest_t manifest; - int fd; - int ret; - - //Range server with range server number 1, for the primary index, is in charge of the manifest - if (index->type != LOCAL_INDEX && - (index->myinfo.rangesrv_num != 1)) { - return; - } - - if ((fd = open_manifest(md, index, O_RDWR | O_CREAT | O_TRUNC)) < 0) { - mlog(MDHIM_SERVER_CRIT, "Rank: %d - Error opening manifest file", - md->mdhim_rank); - return; - } - - //Populate the manifest structure - manifest.num_rangesrvs = index->num_rangesrvs; - manifest.key_type = index->key_type; - manifest.db_type = index->db_type; - manifest.rangesrv_factor = index->range_server_factor; - manifest.slice_size = index->mdhim_max_recs_per_slice; - manifest.num_nodes = md->mdhim_comm_size; - - if ((ret = write(fd, &manifest, sizeof(manifest))) < 0) { - mlog(MDHIM_SERVER_CRIT, "Rank: %d - Error writing manifest file", - md->mdhim_rank); - } - - close(fd); -} - -/** - * read_manifest - * Reads in and validates the manifest file - * - * @param md Pointer to the main MDHIM structure - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - */ -int read_manifest(struct mdhim_t *md, struct index_t *index) { - int fd; - int ret; - index_manifest_t manifest; - - if ((fd = open_manifest(md, index, O_RDWR)) < 0) { - mlog(MDHIM_SERVER_DBG, "Rank: %d - Couldn't open manifest file", - md->mdhim_rank); - return MDHIM_SUCCESS; - } - - if ((ret = read(fd, &manifest, sizeof(manifest))) < 0) { - mlog(MDHIM_SERVER_CRIT, "Rank: %d - Couldn't read manifest file", - md->mdhim_rank); - return MDHIM_ERROR; - } - - ret = MDHIM_SUCCESS; - mlog(MDHIM_SERVER_DBG, "Rank: %d - Manifest contents - \nnum_rangesrvs: %d, key_type: %d, " - "db_type: %d, rs_factor: %u, slice_size: %lu, num_nodes: %d", - md->mdhim_rank, manifest.num_rangesrvs, manifest.key_type, manifest.db_type, - manifest.rangesrv_factor, manifest.slice_size, manifest.num_nodes); - - //Check that the manifest and the current config match - if (manifest.key_type != index->key_type) { - mlog(MDHIM_SERVER_CRIT, "Rank: %d - The key type in the manifest file" - " doesn't match the current key type", - md->mdhim_rank); - ret = MDHIM_ERROR; - } - if (manifest.db_type != index->db_type) { - mlog(MDHIM_SERVER_CRIT, "Rank: %d - The database type in the manifest file" - " doesn't match the current database type", - md->mdhim_rank); - ret = MDHIM_ERROR; - } - - if (manifest.rangesrv_factor != index->range_server_factor) { - mlog(MDHIM_SERVER_CRIT, "Rank: %d - The range server factor in the manifest file" - " doesn't match the current range server factor", - md->mdhim_rank); - ret = MDHIM_ERROR; - } - if (manifest.slice_size != index->mdhim_max_recs_per_slice) { - mlog(MDHIM_SERVER_CRIT, "Rank: %d - The slice size in the manifest file" - " doesn't match the current slice size", - md->mdhim_rank); - ret = MDHIM_ERROR; - } - if (manifest.num_nodes != md->mdhim_comm_size) { - mlog(MDHIM_SERVER_CRIT, "Rank: %d - The number of nodes in this MDHIM instance" - " doesn't match the number used previously", - md->mdhim_rank); - ret = MDHIM_ERROR; - } - - close(fd); - return ret; -} - -/** - * update_stat - * Adds or updates the given stat to the hash table - * - * @param md pointer to the main MDHIM structure - * @param key pointer to the key we are examining - * @param key_len the key's length - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - */ -int update_stat(struct mdhim_t *md, struct index_t *index, void *key, uint32_t key_len) { - int slice_num; - void *val1, *val2; - int float_type = 0; - struct mdhim_stat *os, *stat; - - //Acquire the lock to update the stats - gettimeofday(&sleepstart, NULL); - while (pthread_rwlock_wrlock(index->mdhim_store->mdhim_store_stats_lock) == EBUSY) { - usleep(10); - } - gettimeofday(&sleepend, NULL); - sleeptime += 1000000*(sleepend.tv_sec-sleepstart.tv_sec)+sleepend.tv_usec-sleepstart.tv_usec; - - gettimeofday(&metastart, NULL); - if ((float_type = is_float_key(index->key_type)) == 1) { - val1 = (void *) malloc(sizeof(long)); - val2 = (void *) malloc(sizeof(long)); - /* printf("is float key\n"); - fflush(stdout); - */ - } else if (index->key_type != MDHIM_UNIFYFS_KEY){ - val1 = (void *) malloc(sizeof(uint64_t)); - val2 = (void *) malloc(sizeof(uint64_t)); - /* printf("is not unifyfs key\n"); - fflush(stdout); - */ - } - else { - val1 = NULL; - val2 = NULL; - /* - printf("is unifyfs key\n"); - fflush(stdout); - */ - } - /* - printf("key_type is %d\n", index->key_type); - fflush(stdout); - */ - if (index->key_type == MDHIM_STRING_KEY) { - *(long double *)val1 = get_str_num(key, key_len); - *(long double *)val2 = *(long double *)val1; - } else if (index->key_type == MDHIM_FLOAT_KEY) { - *(long double *)val1 = *(float *) key; - *(long double *)val2 = *(float *) key; - } else if (index->key_type == MDHIM_DOUBLE_KEY) { - *(long double *)val1 = *(double *) key; - *(long double *)val2 = *(double *) key; - } else if (index->key_type == MDHIM_INT_KEY) { - *(uint64_t *)val1 = *(uint32_t *) key; - *(uint64_t *)val2 = *(uint32_t *) key; - } else if (index->key_type == MDHIM_LONG_INT_KEY) { - *(uint64_t *)val1 = *(uint64_t *) key; - *(uint64_t *)val2 = *(uint64_t *) key; - } else if (index->key_type == MDHIM_BYTE_KEY) { - *(unsigned long *)val1 = get_byte_num(key, key_len); - *(unsigned long *)val2 = *(unsigned long *)val1; - } else if (index->key_type == MDHIM_UNIFYFS_KEY) { - val1 = copy_unifyfs_key(key, key_len); - val2 = copy_unifyfs_key(key, key_len); - } - gettimeofday(&metaend, NULL); - metatime+=1000000*(metaend.tv_sec-metastart.tv_sec)+metaend.tv_usec-metastart.tv_usec; - - slice_num = get_slice_num(md, index, key, key_len); - gettimeofday(&hashopstart, NULL); - HASH_FIND_INT(index->mdhim_store->mdhim_store_stats, &slice_num, os); - gettimeofday(&hashopend, NULL); - - - // printf("val1 is %ld, val2 is %ld\n", *((long *)val1+1), *((long *)val2+1)); - stat = malloc(sizeof(struct mdhim_stat)); - stat->min = val1; - stat->max = val2; - stat->num = 1; - stat->key = slice_num; - stat->dirty = 1; - hashoptime += 1000000*(hashopend.tv_sec-hashopstart.tv_sec)+hashopend.tv_usec-hashopstart.tv_usec; -// printf("here min stat is %ld, max stat is %ld\n", *((long *)stat->min), *((long *)stat->max)); -// fflush(stdout); - - gettimeofday(&cmpstart, NULL); - if (index->key_type == MDHIM_UNIFYFS_KEY && os ) { - if (unifyfs_compare(os->min, val1) > 0) { - /* - printf("freeing %x, va1 addr is %x\n", os->min, val1); - fflush(stdout); - */ - free(os->min); - stat->min = val1; - } else { - stat->min = os->min; - free(val1); - } - - if (unifyfs_compare(os->max, val2) < 0) { - /* - printf("freeing %xb,bbb\n", os->max); - fflush(stdout); - */ - free(os->max); - stat->max = val2; - } else { - stat->max = os->max; - free(val2); - } - } - gettimeofday(&cmpend, NULL); - cmptime+=1000000*(cmpend.tv_sec-cmpstart.tv_sec)+cmpend.tv_usec-cmpstart.tv_usec; - - if (float_type && os) { -// printf("comparing min is %ld, val1 is %ld\n", *((long *)stat->min), *((long *)val1)); -// fflush(stdout); - if (*(unsigned long *)os->min > *(unsigned long *)val1) { - /* - printf("freeing %x 1\n", os->min); - fflush(stdout); - */ - free(os->min); - stat->min = val1; - } else { - /* - printf("freeing %x 1\n", val1); - fflush(stdout); - */ - free(val1); - stat->min = os->min; - } - - if (*(unsigned long *)os->max < *(unsigned long *)val2) { - /* - printf("freeing %x2\n", os->max); - fflush(stdout); - */ - free(os->max); - stat->max = val2; - } else { - /* - printf("freeing %x2\n", val2); - fflush(stdout); - */ - free(val2); - stat->max = os->max; - } - } - - if (!float_type && os && index->key_type != MDHIM_UNIFYFS_KEY) { - if (*(uint64_t *)os->min > *(uint64_t *)val1) { - /* - printf("freeing %x3\n", os->min); - fflush(stdout); - */ - free(os->min); - stat->min = val1; - } else { - /* - printf("freeing %x3\n", val1); - fflush(stdout); - */ - free(val1); - stat->min = os->min; - } - - if (*(uint64_t *)os->max < *(uint64_t *)val2) { - /* - printf("freeing %x4\n", os->max); - fflush(stdout); - */ - free(os->max); - stat->max = val2; - } else { - /* - printf("freeing %x4\n", val2); - fflush(stdout); - */ - free(val2); - stat->max = os->max; - } - } - - if (!os) { - gettimeofday(&hashopstart, NULL); - HASH_ADD_INT(index->mdhim_store->mdhim_store_stats, key, stat); - gettimeofday(&hashopend, NULL); - hashoptime += 1000000*(hashopend.tv_sec-hashopstart.tv_sec)+hashopend.tv_usec-hashopstart.tv_usec; - } else { - stat->num = os->num + 1; - - //Replace the existing stat - gettimeofday(&hashopstart, NULL); - HASH_REPLACE_INT(index->mdhim_store->mdhim_store_stats, key, stat, os); - free(os); - gettimeofday(&hashopend, NULL); - hashoptime += 1000000*(hashopend.tv_sec-hashopstart.tv_sec)+hashopend.tv_usec-hashopstart.tv_usec; - } - - //Release the stats lock - gettimeofday(&sleepstart, NULL); - pthread_rwlock_unlock(index->mdhim_store->mdhim_store_stats_lock); - gettimeofday(&sleepend, NULL); - sleeptime +=1000000*(sleepend.tv_sec-sleepstart.tv_sec)+sleepend.tv_usec-sleepstart.tv_usec; - return MDHIM_SUCCESS; -} - -/** - * load_stats - * Loads the statistics from the database - * - * @param md Pointer to the main MDHIM structure - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - */ -int load_stats(struct mdhim_t *md, struct index_t *index) { - void **val; - int *val_len, *key_len; - int **slice; - int *old_slice; - struct mdhim_stat *stat; - int float_type = 0; - void *min, *max; - int done = 0; - - float_type = is_float_key(index->key_type); - slice = malloc(sizeof(int *)); - *slice = NULL; - key_len = malloc(sizeof(int)); - *key_len = sizeof(int); - val = malloc(sizeof(struct mdhim_db_stat *)); - val_len = malloc(sizeof(int)); - old_slice = NULL; - index->mdhim_store->mdhim_store_stats = NULL; - while (!done) { - //Check the db for the key/value - *val = NULL; - *val_len = 0; - index->mdhim_store->get_next(index->mdhim_store->db_stats, - (void **) slice, key_len, (void **) val, - val_len); - - //Add the stat to the hash table - the value is 0 if the key was not in the db - if (!*val || !*val_len) { - done = 1; - continue; - } - - if (old_slice) { - free(old_slice); - old_slice = NULL; - } - - mlog(MDHIM_SERVER_DBG, "Rank: %d - Loaded stat for slice: %d with " - "imin: %lu and imax: %lu, dmin: %Lf, dmax: %Lf, and num: %lu", - md->mdhim_rank, **slice, (*(struct mdhim_db_stat **)val)->imin, - (*(struct mdhim_db_stat **)val)->imax, (*(struct mdhim_db_stat **)val)->dmin, - (*(struct mdhim_db_stat **)val)->dmax, (*(struct mdhim_db_stat **)val)->num); - - stat = malloc(sizeof(struct mdhim_stat)); - if (float_type) { - min = (void *) malloc(sizeof(long double)); - max = (void *) malloc(sizeof(long double)); - *(long double *)min = (*(struct mdhim_db_stat **)val)->dmin; - *(long double *)max = (*(struct mdhim_db_stat **)val)->dmax; - } else { - min = (void *) malloc(sizeof(uint64_t)); - max = (void *) malloc(sizeof(uint64_t)); - *(uint64_t *)min = (*(struct mdhim_db_stat **)val)->imin; - *(uint64_t *)max = (*(struct mdhim_db_stat **)val)->imax; - } - - stat->min = min; - stat->max = max; - stat->num = (*(struct mdhim_db_stat **)val)->num; - stat->key = **slice; - stat->dirty = 0; - old_slice = *slice; - HASH_ADD_INT(index->mdhim_store->mdhim_store_stats, key, stat); - free(*val); - } - - if (old_slice) { - free(old_slice); - } - free(val); - free(val_len); - free(key_len); - free(*slice); - free(slice); - return MDHIM_SUCCESS; -} - -/** - * write_stats - * Writes the statistics stored in a hash table to the database - * This is done on mdhim_close - * - * @param md Pointer to the main MDHIM structure - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - */ -int write_stats(struct mdhim_t *md, struct index_t *bi) { - struct mdhim_stat *stat, *tmp; - struct mdhim_db_stat *dbstat; - int float_type = 0; - - float_type = is_float_key(bi->key_type); - - //Iterate through the stat hash entries - HASH_ITER(hh, bi->mdhim_store->mdhim_store_stats, stat, tmp) { - if (!stat) { - continue; - } - - if (!stat->dirty) { - goto free_stat; - } - - dbstat = malloc(sizeof(struct mdhim_db_stat)); - if (float_type) { - dbstat->dmax = *(long double *)stat->max; - dbstat->dmin = *(long double *)stat->min; - dbstat->imax = 0; - dbstat->imin = 0; - } else { - dbstat->imax = *(uint64_t *)stat->max; - dbstat->imin = *(uint64_t *)stat->min; - dbstat->dmax = 0; - dbstat->dmin = 0; - } - - dbstat->slice = stat->key; - dbstat->num = stat->num; - //Write the key to the database - bi->mdhim_store->put(bi->mdhim_store->db_stats, - &dbstat->slice, sizeof(int), dbstat, - sizeof(struct mdhim_db_stat)); - //Delete and free hash entry - free(dbstat); - - free_stat: - HASH_DEL(bi->mdhim_store->mdhim_store_stats, stat); - free(stat->max); - free(stat->min); - free(stat); - } - - return MDHIM_SUCCESS; -} - -/** - * open_db_store - * Opens the database for the given idenx - * - * @param md Pointer to the main MDHIM structure - * @param index Pointer to the index - * @return the initialized data store or NULL on error - */ - -int open_db_store(struct mdhim_t *md, struct index_t *index) { - char filename[PATH_MAX] = {'\0'}; - int flags = MDHIM_CREATE; - int path_num; - int ret; - - //Database filename is dependent on ranges. This needs to be configurable and take a prefix - if (!md->db_opts->db_paths) { - sprintf(filename, "%s/%s-%d-%d", md->db_opts->db_path, - md->db_opts->db_name, index->id, md->mdhim_rank); - } else { - path_num = index->myinfo.rangesrv_num/((double) index->num_rangesrvs/(double) md->db_opts->num_paths); - path_num = path_num >= md->db_opts->num_paths ? md->db_opts->num_paths - 1 : path_num; - if (path_num < 0) { - sprintf(filename, "%s/%s-%d-%d", md->db_opts->db_path, - md->db_opts->db_name, - index->id, md->mdhim_rank); - } else { - sprintf(filename, "%s/%s-%d-%d", - md->db_opts->db_paths[path_num], - md->db_opts->db_name, index->id, - md->mdhim_rank); - } - } - - //Initialize data store - index->mdhim_store = mdhim_db_init(index->db_type); - if (!index->mdhim_store) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - " - "Error while initializing data store with file: %s", - md->mdhim_rank, - filename); - return MDHIM_ERROR; - } - - //Open the main database and the stats database - if ((ret = index->mdhim_store->open(&index->mdhim_store->db_handle, - &index->mdhim_store->db_stats, - filename, flags, index->key_type, md->db_opts)) != MDHIM_SUCCESS){ - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - " - "Error while opening database", - md->mdhim_rank); - return MDHIM_ERROR; - } - - //Load the stats from the database - if ((ret = load_stats(md, index)) != MDHIM_SUCCESS) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - " - "Error while loading stats", - md->mdhim_rank); - return MDHIM_ERROR; - } - - return MDHIM_SUCCESS; -} - -/** - * get_num_range_servers - * Gets the number of range servers for an index - * - * @param md main MDHIM struct - * @param rindex pointer to a index_t struct - * @return MDHIM_ERROR on error, otherwise the number of range servers - */ -uint32_t get_num_range_servers(struct mdhim_t *md, struct index_t *rindex) { - int size; - uint32_t num_servers = 0; - int i = 0; - int ret; - - if ((ret = MPI_Comm_size(md->mdhim_comm, &size)) != MPI_SUCCESS) { - mlog(MPI_EMERG, "Rank: %d - Couldn't get the size of the comm in get_num_range_servers", - md->mdhim_rank); - return MDHIM_ERROR; - } - - /* Get the number of range servers */ - if (size - 1 < rindex->range_server_factor) { - //The size of the communicator is less than the RANGE_SERVER_FACTOR - return 1; - } - - //Figure out the number of range servers, details on the algorithm are in is_range_server - for (i = 0; i < size; i++) { - if (i % rindex->range_server_factor == 0) { - num_servers++; - } - } - - return num_servers; -} - -/** - * create_local_index - * Creates an index on the primary index that is handled by the same servers as the primary index. - * This index does not have global ordering. Ordering is local to the range server only. - * Retrieving a key from this index will require querying multiple range servers simultaneously. - * - * @param md main MDHIM struct - * @return MDHIM_ERROR on error, otherwise the index identifier - */ -struct index_t *create_local_index(struct mdhim_t *md, int db_type, int key_type, char *index_name) { - struct index_t *li; - struct index_t *check = NULL; - uint32_t rangesrv_num; - int ret; - - MPI_Barrier(md->mdhim_client_comm); - - //Check that the key type makes sense - if (key_type < MDHIM_INT_KEY || key_type > MDHIM_UNIFYFS_KEY) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM - Invalid key type specified"); - return NULL; - } - - //Acquire the lock to update indexes - while (pthread_rwlock_wrlock(md->indexes_lock) == EBUSY) { - usleep(10); - } - - //Create a new global_index to hold our index entry - li = malloc(sizeof(struct index_t)); - if (!li) { - goto done; - } - - //Initialize the new index struct - memset(li, 0, sizeof(struct index_t)); - li->id = HASH_COUNT(md->indexes); - li->range_server_factor = md->primary_index->range_server_factor; - li->mdhim_max_recs_per_slice = MDHIM_MAX_SLICES; - li->type = LOCAL_INDEX; - li->key_type = key_type; - li->db_type = db_type; - li->myinfo.rangesrv_num = 0; - li->myinfo.rank = md->mdhim_rank; - li->primary_id = md->primary_index->id; - li->stats = NULL; - - if (index_name != NULL) { - size_t name_len = strlen(index_name)+1; - char *lower_name = malloc(name_len); - - to_lower(name_len, index_name, lower_name); - - // check if the name has been used - HASH_FIND_STR(md->indexes, lower_name, check); - if(check) { - goto done; - } - - li->name = malloc(name_len); - memcpy(li->name, lower_name, name_len); - - } else { - char buf[50]; - sprintf(buf, "local_%d", li->id); - li->name = malloc(sizeof(char)*strlen(buf)); - strcpy(li->name, buf); - } - - - //Figure out how many range servers we could have based on the range server factor - li->num_rangesrvs = get_num_range_servers(md, li); - - //Get the range servers for this index - ret = get_rangesrvs(md, li); - if (ret != MDHIM_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "Rank: %d - Couldn't get the range server list", - md->mdhim_rank); - } - - //Add it to the hash table - HASH_ADD_INT(md->indexes, id, li); - HASH_ADD_KEYPTR( hh_name, md->indexes_by_name, li->name, strlen(li->name), li ); - - //Test if I'm a range server and get the range server number - if ((rangesrv_num = is_range_server(md, md->mdhim_rank, li)) == MDHIM_ERROR) { - goto done; - } - - if (rangesrv_num > 0) { - //Populate my range server info for this index - li->myinfo.rank = md->mdhim_rank; - li->myinfo.rangesrv_num = rangesrv_num; - } - - //If not a range server, our work here is done - if (!rangesrv_num) { - goto done; - } - - //Read in the manifest file if the rangesrv_num is 1 for the primary index - if (rangesrv_num == 1 && - (ret = read_manifest(md, li)) != MDHIM_SUCCESS) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - " - "Error: There was a problem reading or validating the manifest file", - md->mdhim_rank); - MPI_Abort(md->mdhim_comm, 0); - } - - //Open the data store - ret = open_db_store(md, (struct index_t *) li); - if (ret != MDHIM_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "Rank: %d - Error opening data store for index: %d", - md->mdhim_rank, li->id); - MPI_Abort(md->mdhim_comm, 0); - } - - //Initialize the range server threads if they haven't been already - if (!md->mdhim_rs) { - ret = range_server_init(md); - } - -done: - //Release the indexes lock - if (pthread_rwlock_unlock(md->indexes_lock) != 0) { - mlog(MDHIM_CLIENT_CRIT, "Rank: %d - Error unlocking the indexes_lock", - md->mdhim_rank); - return NULL; - } - - if (!li) { - return NULL; - } - - // The index name has already been taken - if(check) { - mlog(MDHIM_CLIENT_CRIT, "Rank: %d - Error creating index: Name %s, already exists", md->mdhim_rank, index_name); - return NULL; - } - - return li; -} - -/** - * create_global_index - * Collective call that creates a global index. - * A global index has global ordering. This means that range servers serve mutually exclusive keys - * and keys can be retrieved across servers in order. Retrieving a key will query only one range - * server. - * - * @param md main MDHIM struct - * @param server_factor used in calculating the number of range servers - * @param max_recs_per_slice the number of records per slice - * @return MDHIM_ERROR on error, otherwise the index identifier - */ - -struct index_t *create_global_index(struct mdhim_t *md, int server_factor, - uint64_t max_recs_per_slice, - int db_type, int key_type, char *index_name) { - struct index_t *gi; - struct index_t *check = NULL; - uint32_t rangesrv_num; - int ret; - - MPI_Barrier(md->mdhim_client_comm); - //Check that the key type makes sense - if (key_type < MDHIM_INT_KEY || key_type > MDHIM_UNIFYFS_KEY) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM - Invalid key type specified"); - return NULL; - } - - //Acquire the lock to update indexes - while (pthread_rwlock_wrlock(md->indexes_lock) == EBUSY) { - usleep(10); - } - - //Create a new global_index to hold our index entry - gi = malloc(sizeof(struct index_t)); - if (!gi) { - goto done; - } - - //Initialize the new index struct - memset(gi, 0, sizeof(struct index_t)); - gi->id = HASH_COUNT(md->indexes); - gi->range_server_factor = server_factor; - gi->mdhim_max_recs_per_slice = max_recs_per_slice; - gi->type = gi->id > 0 ? SECONDARY_INDEX : PRIMARY_INDEX; - gi->key_type = key_type; - gi->db_type = db_type; - gi->myinfo.rangesrv_num = 0; - gi->myinfo.rank = md->mdhim_rank; - gi->primary_id = gi->type == SECONDARY_INDEX ? md->primary_index->id : -1; - gi->stats = NULL; - - if (gi->id > 0) { - - if (index_name != NULL) { - - size_t name_len = strlen(index_name)+1; - char *lower_name = malloc(name_len); - - to_lower(name_len, index_name, lower_name); - - // check if the name has been used - HASH_FIND_STR(md->indexes, lower_name, check); - if(check) { - goto done; - } - - gi->name = malloc(name_len); - memcpy(gi->name, lower_name, name_len); - - } else { - char buf[50]; - sprintf(buf, "global_%d", gi->id); - gi->name = malloc(sizeof(char)*strlen(buf)); - strcpy(gi->name, buf); - } - - } else { - gi->name = malloc(sizeof(char)*10); - strcpy(gi->name, "primary"); - } - - //Figure out how many range servers we could have based on the range server factor - gi->num_rangesrvs = get_num_range_servers(md, gi); - //Get the range servers for this index - ret = get_rangesrvs(md, gi); - if (ret != MDHIM_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "Rank: %d - Couldn't get the range server list", - md->mdhim_rank); - } - - //Add it to the hash table - HASH_ADD_INT(md->indexes, id, gi); - HASH_ADD_KEYPTR( hh_name, md->indexes_by_name, gi->name, strlen(gi->name), gi ); - - //Test if I'm a range server and get the range server number - if ((rangesrv_num = is_range_server(md, md->mdhim_rank, gi)) == MDHIM_ERROR) { - goto done; - } - - if (rangesrv_num > 0) { - //Populate my range server info for this index - gi->myinfo.rank = md->mdhim_rank; - gi->myinfo.rangesrv_num = rangesrv_num; - } - - //Initialize the communicator for this index - if ((ret = index_init_comm(md, gi)) != MDHIM_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "Rank: %d - Error creating the index communicator", - md->mdhim_rank); - goto done; - } - //If not a range server, our work here is done - if (!rangesrv_num) { - goto done; - } - - //Read in the manifest file if the rangesrv_num is 1 for the primary index - if (rangesrv_num == 1 && - (ret = read_manifest(md, gi)) != MDHIM_SUCCESS) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - " - "Error: There was a problem reading or validating the manifest file", - md->mdhim_rank); - MPI_Abort(md->mdhim_comm, 0); - } - - //Open the data store - ret = open_db_store(md, (struct index_t *) gi); - if (ret != MDHIM_SUCCESS) { - // printf("open store failed with error %d, rank%d\n", ret, md->mdhim_rank); - mlog(MDHIM_CLIENT_CRIT, "Rank: %d - Error opening data store for index: %d", - md->mdhim_rank, gi->id); - } - - //Initialize the range server threads if they haven't been already - - if (!md->mdhim_rs) { - ret = range_server_init(md); - } - -done: - //Release the indexes lock - if (pthread_rwlock_unlock(md->indexes_lock) != 0) { - mlog(MDHIM_CLIENT_CRIT, "Rank: %d - Error unlocking the indexes_lock", - md->mdhim_rank); - return NULL; - } - - if (!gi) { - return NULL; - } - - // The index name has already been taken - if(check) { - mlog(MDHIM_CLIENT_CRIT, "Rank: %d - Error creating index: Name %s, already exists", md->mdhim_rank, index_name); - return NULL; - } - - return gi; -} - -/** - * get_rangesrvs - * Creates a rangesrv_info hash table - * - * @param md in main MDHIM struct - * @return a list of range servers - */ -int get_rangesrvs(struct mdhim_t *md, struct index_t *index) { - struct rangesrv_info *rs_entry_num, *rs_entry_rank; - uint32_t rangesrv_num; - int i; - - //Iterate through the ranks to determine which ones are range servers - for (i = 0; i < md->mdhim_comm_size; i++) { - //Test if the rank is range server for this index - if ((rangesrv_num = is_range_server(md, i, index)) == MDHIM_ERROR) { - continue; - } - - if (!rangesrv_num) { - continue; - } - - //Set the master range server to be the server with the largest rank - if (i > index->rangesrv_master) { - index->rangesrv_master = i; - } - - rs_entry_num = malloc(sizeof(struct rangesrv_info)); - rs_entry_rank = malloc(sizeof(struct rangesrv_info)); - rs_entry_num->rank = rs_entry_rank->rank = i; - rs_entry_rank->rangesrv_num = rs_entry_num->rangesrv_num = rangesrv_num; -// printf("range server rank is %d, range server num is %d, myrank is %d\n", rs_entry_rank->rank, rs_entry_num->rangesrv_num, md->mdhim_rank); -// fflush(stdout); - //Add it to the hash tables - HASH_ADD_INT(index->rangesrvs_by_num, rangesrv_num, rs_entry_num); - HASH_ADD_INT(index->rangesrvs_by_rank, rank, rs_entry_rank); - } - - return MDHIM_SUCCESS; -} - -/** - * is_range_server - * Tests to see if the given rank is a range server for one or more indexes - * - * @param md main MDHIM struct - * @param rank rank to find out if it is a range server - * @return MDHIM_ERROR on error, 0 on false, 1 or greater to represent the range server number otherwise - */ -uint32_t is_range_server(struct mdhim_t *md, int rank, struct index_t *index) { - int size; - int ret; - uint64_t rangesrv_num = 0; - - //If a local index, check to see if the rank is a range server for the primary index - if (index->type == LOCAL_INDEX) { - rangesrv_num = is_range_server(md, rank, md->primary_index); - - return rangesrv_num; - } - - if ((ret = MPI_Comm_size(md->mdhim_comm, &size)) != MPI_SUCCESS) { - mlog(MPI_EMERG, "Rank: %d - Couldn't get the size of the comm in is_range_server", - md->mdhim_rank); - return MDHIM_ERROR; - } - - /* Get the range server number, which is just a number from 1 onward - It represents the ranges the server serves and is calculated with the RANGE_SERVER_FACTOR - - The RANGE_SERVER_FACTOR is a number that is divided by the rank such that if the - remainder is zero, then the rank is a rank server - - For example, if there were 8 ranks and the RANGE_SERVER_FACTOR is 2, - then ranks: 0, 2, 4, 6 are range servers - - If the size of communicator is less than the RANGE_SERVER_FACTOR, - the last rank is the range server - */ - - size -= 1; - if (size < index->range_server_factor && rank == size) { - //The size of the communicator is less than the RANGE_SERVER_FACTOR - rangesrv_num = 1; - - } else if (size >= index->range_server_factor && rank % index->range_server_factor == 0) { - //This is a range server, get the range server's number - rangesrv_num = rank / index->range_server_factor; - rangesrv_num++; - } - - if (rangesrv_num > index->num_rangesrvs) { - rangesrv_num = 0; - } -// printf("rangeser_num is %d, rank is %d\n", rangesrv_num, md->mdhim_rank); - return rangesrv_num; -} - -/** - * range_server_init_comm - * Initializes the range server communicator that is used for range server to range - * server collectives - * The stat flush function will use this communicator - * - * @param md Pointer to the main MDHIM structure - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - */ -int index_init_comm(struct mdhim_t *md, struct index_t *bi) { - MPI_Group orig, new_group; - int *ranks; - int i = 0; - int ret; - int comm_size, size; - MPI_Comm new_comm; - struct rangesrv_info *rangesrv, *tmp; - - ranks = NULL; - size = 0; - //Populate the ranks array that will be in our new comm - if ((ret = im_range_server(bi)) == 1) { - ranks = malloc(sizeof(int) * bi->num_rangesrvs); - //Iterate through the stat hash entries - HASH_ITER(hh, bi->rangesrvs_by_rank, rangesrv, tmp) { - if (!rangesrv) { - continue; - } - - ranks[size] = rangesrv->rank; - size++; - } - } else { - MPI_Comm_size(md->mdhim_comm, &comm_size); - ranks = malloc(sizeof(int) * comm_size); - for (i = 0; i < comm_size; i++) { - HASH_FIND_INT(bi->rangesrvs_by_rank, &i, rangesrv); - if (rangesrv) { - continue; - } - - ranks[size] = i; - size++; - } - } - - //Create a new group with the range servers only - if ((ret = MPI_Comm_group(md->mdhim_comm, &orig)) != MPI_SUCCESS) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - " - "Error while creating a new group in range_server_init_comm", - md->mdhim_rank); - return MDHIM_ERROR; - } - - if ((ret = MPI_Group_incl(orig, size, ranks, &new_group)) != MPI_SUCCESS) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - " - "Error while creating adding ranks to the new group in range_server_init_comm", - md->mdhim_rank); - return MDHIM_ERROR; - } - - if ((ret = MPI_Comm_create(md->mdhim_comm, new_group, &new_comm)) - != MPI_SUCCESS) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - " - "Error while creating the new communicator in range_server_init_comm", - md->mdhim_rank); - return MDHIM_ERROR; - } - if ((ret = im_range_server(bi)) == 1) { - memcpy(&bi->rs_comm, &new_comm, sizeof(MPI_Comm)); - } else { - MPI_Comm_free(&new_comm); - } - - MPI_Group_free(&orig); - MPI_Group_free(&new_group); - free(ranks); - return MDHIM_SUCCESS; -} - -struct index_t *get_index(struct mdhim_t *md, int index_id) { - struct index_t *index; - - //Acquire the lock to update indexes - while (pthread_rwlock_wrlock(md->indexes_lock) == EBUSY) { - usleep(10); - } - - index = NULL; - if (index_id >= 0) { - HASH_FIND(hh, md->indexes, &index_id, sizeof(int), index); - } - - if (pthread_rwlock_unlock(md->indexes_lock) != 0) { - mlog(MDHIM_CLIENT_CRIT, "Rank: %d - Error unlocking the indexes_lock", - md->mdhim_rank); - return NULL; - } - - return index; -} - - -/* - * === FUNCTION ====================================================================== - * Name: get_index_by_name - * Description: Retrieve the index by name - * ===================================================================================== - */ -struct index_t* -get_index_by_name ( struct mdhim_t *md, char *index_name ) -{ - struct index_t *index = NULL; - size_t name_len = strlen(index_name)+1; - char *lower_name = malloc(name_len); - - // Acquire the lock to update indexes - while ( pthread_rwlock_wrlock(md->indexes_lock) == EBUSY ) { - usleep(10); - } - - to_lower(name_len, index_name, lower_name); - - if ( strcmp(lower_name, "") != 0 ) { - HASH_FIND(hh_name, md->indexes_by_name, lower_name, strlen(lower_name), index); - } - - if ( pthread_rwlock_unlock(md->indexes_lock) !=0 ) { - mlog(MDHIM_CLIENT_CRIT, "Rank: %d - Error unlocking the indexes_lock", - md->mdhim_rank); - return NULL; - } - -return index; -} /* ----- end of function get_index_by_name ----- */ - -void indexes_release(struct mdhim_t *md) { - struct index_t *cur_indx, *tmp_indx; - struct rangesrv_info *cur_rs, *tmp_rs; - int ret; - struct mdhim_stat *stat, *tmp; - - HASH_ITER(hh, md->indexes, cur_indx, tmp_indx) { - HASH_DELETE(hh, md->indexes, cur_indx); - HASH_DELETE(hh_name, md->indexes_by_name, cur_indx); - HASH_ITER(hh, cur_indx->rangesrvs_by_num, cur_rs, tmp_rs) { - HASH_DEL(cur_indx->rangesrvs_by_num, cur_rs); - free(cur_rs); - } - - HASH_ITER(hh, cur_indx->rangesrvs_by_rank, cur_rs, tmp_rs) { - HASH_DEL(cur_indx->rangesrvs_by_rank, cur_rs); - free(cur_rs); - } - - //Clean up the storage if I'm a range server for this index - if (cur_indx->myinfo.rangesrv_num > 0) { - //Write the stats to the database - if ((ret = write_stats(md, cur_indx)) != MDHIM_SUCCESS) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - " - "Error while loading stats", - md->mdhim_rank); - } - - if (cur_indx->myinfo.rangesrv_num == 1) { - //Write the manifest - write_manifest(md, cur_indx); - } - - //Close the database - if ((ret = cur_indx->mdhim_store->close(cur_indx->mdhim_store->db_handle, - cur_indx->mdhim_store->db_stats)) - != MDHIM_SUCCESS) { - mlog(MDHIM_SERVER_CRIT, "Rank: %d - Error closing database", - md->mdhim_rank); - } - - pthread_rwlock_destroy(cur_indx->mdhim_store->mdhim_store_stats_lock); - free(cur_indx->mdhim_store->mdhim_store_stats_lock); - if (cur_indx->type != LOCAL_INDEX) { - MPI_Comm_free(&cur_indx->rs_comm); - } - free(cur_indx->mdhim_store); - } - - - //Iterate through the stat hash entries to free them - HASH_ITER(hh, cur_indx->stats, stat, tmp) { - if (!stat) { - continue; - } - - HASH_DEL(cur_indx->stats, stat); - free(stat->max); - free(stat->min); - free(stat); - } - - free(cur_indx); - } -} - -int pack_stats(struct index_t *index, void *buf, int size, - int float_type, int stat_size, MPI_Comm comm) { - - struct mdhim_stat *stat, *tmp; - void *tstat; - struct mdhim_db_istat *istat; - struct mdhim_db_fstat *fstat; - int ret = MPI_SUCCESS; - int sendidx = 0; - - //Pack the stat data I have by iterating through the stats hash - HASH_ITER(hh, index->mdhim_store->mdhim_store_stats, stat, tmp) { - //Get the appropriate struct to send - if (float_type) { - fstat = malloc(sizeof(struct mdhim_db_fstat)); - fstat->slice = stat->key; - fstat->num = stat->num; - - if (index->key_type == MDHIM_UNIFYFS_KEY) { -/* printf("bbbefore:min fid is %ld, min offset is %ld, \ - max fid is %ld, max offset is %ld\n", \ - *((ulong *)stat->min), *((ulong *)stat->min+1), \ - *((ulong *)stat->max), *((ulong *)stat->max+1)); - fflush(stdout); -*/ - memcpy((char *)&fstat->dmin, (char *)stat->min, sizeof(ulong)); - memcpy((char *)(&fstat->dmin)+sizeof(ulong), (char *)(stat->min)+sizeof(ulong), \ - sizeof(ulong)); - memcpy((char *)&fstat->dmax, (char *)stat->max, sizeof(ulong)); - memcpy((char *)(&fstat->dmax)+sizeof(ulong), (char *)(stat->max)+sizeof(ulong), \ - sizeof(ulong)); -/* printf("before:min fid is %ld, min offset is %ld, \ - max fid is %ld, max offset is %ld\n", \ - *((ulong *)&(fstat->dmin)), *((ulong *)&(fstat->dmin)+1), \ - *((ulong *)&(fstat->dmax)), *((ulong *)&(fstat->dmax)+1)); - fflush(stdout); -*/ - } - else { - fstat->dmin = *(long double *) stat->min; - fstat->dmax = *(long double *) stat->max; - } - tstat = fstat; - } else { - istat = malloc(sizeof(struct mdhim_db_istat)); - istat->slice = stat->key; - istat->num = stat->num; - istat->imin = *(uint64_t *) stat->min; - istat->imax = *(uint64_t *) stat->max; - tstat = istat; - } - - //Pack the struct - if ((ret = MPI_Pack(tstat, stat_size, MPI_CHAR, buf, size, &sendidx, - comm)) != MPI_SUCCESS) { - mlog(MPI_CRIT, "Error packing buffer when sending stat info" - " to master range server"); - free(buf); - free(tstat); - return ret; - } - - free(tstat); - } - - return ret; -} - -int get_stat_flush_global(struct mdhim_t *md, struct index_t *index) { - char *sendbuf; - int sendsize = 0; - int recvidx = 0; - char *recvbuf; - int *recvcounts; - int *displs; - int recvsize; - int ret = 0; - int i = 0; - int float_type = 0; - struct mdhim_stat *stat, *tmp; - void *tstat; - int stat_size = 0; - int master; - int num_items = 0; - - //Determine the size of the buffers to send based on the number and type of stats - if ((ret = is_float_key(index->key_type)) == 1 || \ - index->key_type == MDHIM_UNIFYFS_KEY) { - float_type = 1; - stat_size = sizeof(struct mdhim_db_fstat); - } else { - float_type = 0; - stat_size = sizeof(struct mdhim_db_istat); - } - recvbuf = NULL; - if (index->myinfo.rangesrv_num > 0) { - //Get the number stats in our hash table - if (index->mdhim_store->mdhim_store_stats) { - num_items = HASH_COUNT(index->mdhim_store->mdhim_store_stats); - // printf("num_items is %ld\n", num_items); - // fflush(stdout); - } else { - num_items = 0; - } - ret = is_float_key(index->key_type); - if (ret == 1 || index->key_type == MDHIM_UNIFYFS_KEY) - sendsize = num_items * sizeof(struct mdhim_db_fstat); - else - sendsize = num_items * sizeof(struct mdhim_db_istat); - } - - if (index->myinfo.rangesrv_num > 0) { - //Get the master range server rank according the range server comm - if ((ret = MPI_Comm_size(index->rs_comm, &master)) != MPI_SUCCESS) { - mlog(MPI_CRIT, "Rank: %d - " - "Error getting size of comm", - md->mdhim_rank); - } - //The master rank is the last rank in range server comm - master--; - - //First we send the number of items that we are going to send - //Allocate the receive buffer size - recvsize = index->num_rangesrvs * sizeof(int); - recvbuf = malloc(recvsize); - memset(recvbuf, 0, recvsize); - MPI_Barrier(index->rs_comm); - //The master server will receive the number of stats each server has - if ((ret = MPI_Gather(&num_items, 1, MPI_UNSIGNED, recvbuf, 1, - MPI_INT, master, index->rs_comm)) != MPI_SUCCESS) { - mlog(MDHIM_SERVER_CRIT, "Rank: %d - " - "Error while receiving the number of statistics from each range server", - md->mdhim_rank); - free(recvbuf); - goto error; - } - - num_items = 0; - displs = malloc(sizeof(int) * index->num_rangesrvs); - recvcounts = malloc(sizeof(int) * index->num_rangesrvs); - for (i = 0; i < index->num_rangesrvs; i++) { - displs[i] = num_items * stat_size; - num_items += ((int *)recvbuf)[i]; - recvcounts[i] = ((int *)recvbuf)[i] * stat_size; - } - - free(recvbuf); - recvbuf = NULL; - - //Allocate send buffer - sendbuf = malloc(sendsize); - - //Pack the stat data I have by iterating through the stats hash table - ret = pack_stats(index, sendbuf, sendsize, - float_type, stat_size, index->rs_comm); - if (ret != MPI_SUCCESS) { - free(recvbuf); - goto error; - } - - //Allocate the recv buffer for the master range server - if (md->mdhim_rank == index->rangesrv_master) { - recvsize = num_items * stat_size; - recvbuf = malloc(recvsize); - memset(recvbuf, 0, recvsize); - } else { - recvbuf = NULL; - recvsize = 0; - } - - MPI_Barrier(index->rs_comm); - //The master server will receive the stat info from each rank in the range server comm - if ((ret = MPI_Gatherv(sendbuf, sendsize, MPI_PACKED, recvbuf, recvcounts, displs, - MPI_PACKED, master, index->rs_comm)) != MPI_SUCCESS) { - mlog(MDHIM_SERVER_CRIT, "Rank: %d - " - "Error while receiving range server info", - md->mdhim_rank); - goto error; - } - free(recvcounts); - free(displs); - free(sendbuf); - } - - MPI_Barrier(md->mdhim_client_comm); - //The master range server broadcasts the number of stats it is going to send - if ((ret = MPI_Bcast(&num_items, 1, MPI_UNSIGNED, index->rangesrv_master, - md->mdhim_comm)) != MPI_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "Rank: %d - " - "Error while receiving the number of stats to receive", - md->mdhim_rank); - goto error; - } - MPI_Barrier(md->mdhim_client_comm); - - recvsize = num_items * stat_size; - //Allocate the receive buffer size for clients - if (md->mdhim_rank != index->rangesrv_master) { - recvbuf = malloc(recvsize); - memset(recvbuf, 0, recvsize); - } - - //The master range server broadcasts the receive buffer to the mdhim_comm - if ((ret = MPI_Bcast(recvbuf, recvsize, MPI_PACKED, index->rangesrv_master, - md->mdhim_comm)) != MPI_SUCCESS) { - mlog(MPI_CRIT, "Rank: %d - " - "Error while receiving range server info", - md->mdhim_rank); - goto error; - } - - //Unpack the receive buffer and populate our index->stats hash table - recvidx = 0; - for (i = 0; i < recvsize; i+=stat_size) { - tstat = malloc(stat_size); - memset(tstat, 0, stat_size); - if ((ret = MPI_Unpack(recvbuf, recvsize, &recvidx, tstat, stat_size, - MPI_CHAR, md->mdhim_comm)) != MPI_SUCCESS) { - mlog(MPI_CRIT, "Rank: %d - " - "Error while unpacking stat data", - md->mdhim_rank); - free(tstat); - goto error; - } - - stat = malloc(sizeof(struct mdhim_stat)); - stat->dirty = 0; - if (float_type) { - stat->min = (void *) malloc(sizeof(long double)); - stat->max = (void *) malloc(sizeof(long double)); - if (index->key_type == MDHIM_UNIFYFS_KEY) { - struct mdhim_db_fstat * tmp_stat; - tmp_stat = (struct mdhim_db_fstat *)tstat; - memcpy((char *)stat->min, (char *)&(tmp_stat->dmin), sizeof(ulong)); - memcpy((char *)(stat->min)+sizeof(ulong), \ - (char *)&(tmp_stat->dmin)+sizeof(ulong), sizeof(ulong)); - memcpy((char *)stat->max, (char *)&(tmp_stat->dmax), sizeof(ulong)); - memcpy((char *)(stat->max)+sizeof(ulong), \ - (char *)&(tmp_stat->dmax)+sizeof(ulong), sizeof(ulong)); - /* printf("abcd:min fid is %ld, min offset is %ld, \ - max fid is %ld, max offset is %ld, rank is %d\n", \ - *((ulong *)stat->min), *((ulong *)stat->min+1), \ - *((ulong *)stat->max), *((ulong *)stat->max+1), \ - md->mdhim_rank); - fflush(stdout); - */ - } - else { - *(long double *)stat->min = ((struct mdhim_db_fstat *)tstat)->dmin; - *(long double *)stat->max = ((struct mdhim_db_fstat *)tstat)->dmax; - } - stat->key = ((struct mdhim_db_fstat *)tstat)->slice; - // printf("abcd:slice is %ld\n", stat->key); - stat->num = ((struct mdhim_db_fstat *)tstat)->num; - } else { - stat->min = (void *) malloc(sizeof(uint64_t)); - stat->max = (void *) malloc(sizeof(uint64_t)); - *(uint64_t *)stat->min = ((struct mdhim_db_istat *)tstat)->imin; - *(uint64_t *)stat->max = ((struct mdhim_db_istat *)tstat)->imax; - stat->key = ((struct mdhim_db_istat *)tstat)->slice; - stat->num = ((struct mdhim_db_istat *)tstat)->num; - } - - HASH_FIND_INT(index->stats, &stat->key, tmp); - if (!tmp) { - HASH_ADD_INT(index->stats, key, stat); - } else { - //Replace the existing stat - HASH_REPLACE_INT(index->stats, key, stat, tmp); - free(tmp); - } - free(tstat); - } - free(recvbuf); - return MDHIM_SUCCESS; - -error: - if (recvbuf) { - free(recvbuf); - } - - return MDHIM_ERROR; -} - -int get_stat_flush_local(struct mdhim_t *md, struct index_t *index) { - char *sendbuf; - int sendsize = 0; - int recvidx = 0; - char *recvbuf; - int *num_items_to_recv; - int *recvcounts; - int *displs; - int recvsize; - int ret = 0; - int i = 0, j; - int float_type = 0; - struct mdhim_stat *stat, *tmp, *rank_stat; - void *tstat; - int stat_size = 0; - int num_items = 0; - - //Determine the size of the buffers to send based on the number and type of stats - if ((ret = is_float_key(index->key_type)) == 1) { - float_type = 1; - stat_size = sizeof(struct mdhim_db_fstat); - } else { - float_type = 0; - stat_size = sizeof(struct mdhim_db_istat); - } - - recvbuf = NULL; - if (index->myinfo.rangesrv_num > 0) { - //Get the number stats in our hash table - if (index->mdhim_store->mdhim_store_stats) { - num_items = HASH_COUNT(index->mdhim_store->mdhim_store_stats); - } else { - num_items = 0; - } - if ((ret = is_float_key(index->key_type)) == 1) { - sendsize = num_items * sizeof(struct mdhim_db_fstat); - } else { - sendsize = num_items * sizeof(struct mdhim_db_istat); - } - } - - //First we send the number of items that we are going to send - //Allocate the receive buffer size - recvsize = md->mdhim_comm_size * sizeof(int); - recvbuf = malloc(recvsize); - memset(recvbuf, 0, recvsize); - MPI_Barrier(md->mdhim_client_comm); - //All gather the number of items to send - if ((ret = MPI_Allgather(&num_items, 1, MPI_UNSIGNED, recvbuf, 1, - MPI_INT, md->mdhim_comm)) != MPI_SUCCESS) { - mlog(MDHIM_SERVER_CRIT, "Rank: %d - " - "Error while receiving the number of statistics from each range server", - md->mdhim_rank); - free(recvbuf); - goto error; - } - - num_items = 0; - displs = malloc(sizeof(int) * md->mdhim_comm_size); - recvcounts = malloc(sizeof(int) * md->mdhim_comm_size); - for (i = 0; i < md->mdhim_comm_size; i++) { - displs[i] = num_items * stat_size; - num_items += ((int *)recvbuf)[i]; - recvcounts[i] = ((int *)recvbuf)[i] * stat_size; - } - - num_items_to_recv = (int *)recvbuf; - recvbuf = NULL; - - if (sendsize) { - //Allocate send buffer - sendbuf = malloc(sendsize); - - //Pack the stat data I have by iterating through the stats hash table - ret = pack_stats(index, sendbuf, sendsize, - float_type, stat_size, md->mdhim_comm); - if (ret != MPI_SUCCESS) { - free(recvbuf); - goto error; - } - } else { - sendbuf = NULL; - } - - recvsize = num_items * stat_size; - recvbuf = malloc(recvsize); - memset(recvbuf, 0, recvsize); - - MPI_Barrier(md->mdhim_client_comm); - //The master server will receive the stat info from each rank in the range server comm - if ((ret = MPI_Allgatherv(sendbuf, sendsize, MPI_PACKED, recvbuf, recvcounts, displs, - MPI_PACKED, md->mdhim_comm)) != MPI_SUCCESS) { - mlog(MDHIM_SERVER_CRIT, "Rank: %d - " - "Error while receiving range server info", - md->mdhim_rank); - goto error; - } - - free(recvcounts); - free(displs); - free(sendbuf); - - - MPI_Barrier(md->mdhim_client_comm); - - //Unpack the receive buffer and populate our index->stats hash table - recvidx = 0; - for (i = 0; i < md->mdhim_comm_size; i++) { - if ((ret = is_range_server(md, i, index)) < 1) { - continue; - } - - HASH_FIND_INT(index->stats, &i, tmp); - if (!tmp) { - mlog(MPI_CRIT, "Rank: %d - " - "Adding rank: %d to local index stat data", - md->mdhim_rank, i); - rank_stat = malloc(sizeof(struct mdhim_stat)); - memset(rank_stat, 0, sizeof(struct mdhim_stat)); - rank_stat->key = i; - rank_stat->stats = NULL; - HASH_ADD_INT(index->stats, key, rank_stat); - } else { - rank_stat = tmp; - } - - for (j = 0; j < num_items_to_recv[i]; j++) { - tstat = malloc(stat_size); - memset(tstat, 0, stat_size); - if ((ret = MPI_Unpack(recvbuf, recvsize, &recvidx, tstat, stat_size, - MPI_CHAR, md->mdhim_comm)) != MPI_SUCCESS) { - mlog(MPI_CRIT, "Rank: %d - " - "Error while unpacking stat data", - md->mdhim_rank); - free(tstat); - goto error; - } - - stat = malloc(sizeof(struct mdhim_stat)); - stat->dirty = 0; - if (float_type) { - stat->min = (void *) malloc(sizeof(long double)); - stat->max = (void *) malloc(sizeof(long double)); - *(long double *)stat->min = ((struct mdhim_db_fstat *)tstat)->dmin; - *(long double *)stat->max = ((struct mdhim_db_fstat *)tstat)->dmax; - stat->key = ((struct mdhim_db_fstat *)tstat)->slice; - stat->num = ((struct mdhim_db_fstat *)tstat)->num; - } else { - stat->min = (void *) malloc(sizeof(uint64_t)); - stat->max = (void *) malloc(sizeof(uint64_t)); - *(uint64_t *)stat->min = ((struct mdhim_db_istat *)tstat)->imin; - *(uint64_t *)stat->max = ((struct mdhim_db_istat *)tstat)->imax; - stat->key = ((struct mdhim_db_istat *)tstat)->slice; - stat->num = ((struct mdhim_db_istat *)tstat)->num; - } - - mlog(MPI_CRIT, "Rank: %d - " - "Adding rank: %d with stat min: %lu, stat max: %lu, stat key: %u num: %lu" - "to local index stat data", - md->mdhim_rank, i, *(uint64_t *)stat->min, *(uint64_t *)stat->max, - stat->key, stat->num); - HASH_FIND_INT(rank_stat->stats, &stat->key, tmp); - if (!tmp) { - HASH_ADD_INT(rank_stat->stats, key, stat); - } else { - //Replace the existing stat - HASH_REPLACE_INT(rank_stat->stats, key, stat, tmp); - free(tmp); - } - - free(tstat); - } - } - - free(recvbuf); - free(num_items_to_recv); - - return MDHIM_SUCCESS; - -error: - if (recvbuf) { - free(recvbuf); - } - - return MDHIM_ERROR; -} - -/** - * get_stat_flush - * Receives stat data from all the range servers and populates md->stats - * - * @param md in main MDHIM struct - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - */ -int get_stat_flush(struct mdhim_t *md, struct index_t *index) { - int ret; - - pthread_mutex_lock(md->mdhim_comm_lock); - - if (index->type != LOCAL_INDEX) { - ret = get_stat_flush_global(md, index); - } else { - ret = get_stat_flush_local(md, index); - } - - pthread_mutex_unlock(md->mdhim_comm_lock); - - return ret; -} diff --git a/meta/src/indexes.h b/meta/src/indexes.h deleted file mode 100644 index d40c7daa5..000000000 --- a/meta/src/indexes.h +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * - * Copyright 2017, UT-Battelle, LLC. - * - * LLNL-CODE-741539 - * All rights reserved. - * - * This is the license for UnifyFS. - * For details, see https://github.com/LLNL/UnifyFS. - * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. - */ - -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * Copyright (c) 2017, Florida State University. Contributions from - * the Computer Architecture and Systems Research Laboratory (CASTL) - * at the Department of Computer Science. - * - * Written by: Teng Wang, Adam Moody, Weikuan Yu, Kento Sato, Kathryn Mohror - * LLNL-CODE-728877. All rights reserved. - * - * This file is part of burstfs. - * For details, see https://github.com/llnl/burstfs - * Please read https://github.com/llnl/burstfs/LICENSE for full license text. - */ - -/* - * - * Copyright (c) 2014, Los Alamos National Laboratory - * All rights reserved. - * - */ - -#ifndef __INDEX_H -#define __INDEX_H - -#include "uthash.h" -#include "mdhim_options.h" -#include "mpi.h" - -#define PRIMARY_INDEX 1 -#define SECONDARY_INDEX 2 -#define LOCAL_INDEX 3 -#define REMOTE_INDEX 4 - -struct mdhim_t; // avoid circular #include chain - -typedef struct rangesrv_info rangesrv_info; -/* - * Range server info - * Contains information about each range server - */ -struct rangesrv_info { - //The range server's rank in the mdhim_comm - uint32_t rank; - //The range server's identifier based on rank and number of servers - uint32_t rangesrv_num; - uint32_t num_recs; - void *first_key; - - UT_hash_handle hh; /* makes this structure hashable */ -}; - -/* - * Remote Index info - * Contains information about a remote index - * - * A remote index means that an index can be served by one or more range servers - */ -struct index_t { - int id; // Hash key - char *name; // Secondary Hash key - - //The abstracted data store layer that mdhim uses to store and retrieve records - struct mdhim_store_t *mdhim_store; - //Options for the mdhim data store - int key_type; //The key type used in the db - int db_type; //The database type - int type; /* The type of index - (PRIMARY_INDEX, SECONDARY_INDEX, LOCAL_INDEX) */ - int primary_id; /* The primary index id if this is a secondary index */ - rangesrv_info *rangesrvs_by_num; /* Hash table of the range servers - serving this index. Key is range server number */ - rangesrv_info *rangesrvs_by_rank; /* Hash table of the range servers - serving this index. Key is the rank */ - //Used to determine the number of range servers which is based in - //if myrank % RANGE_SERVER_FACTOR == 0, then myrank is a server - int range_server_factor; - - //Maximum size of a slice. A range server may serve several slices. - uint64_t mdhim_max_recs_per_slice; - - //This communicator is for range servers only to talk to each other - MPI_Comm rs_comm; - /* The rank of the range server master that will broadcast stat data to all clients - This rank is the rank in mdhim_comm not in the range server communicator */ - int rangesrv_master; - - //The number of range servers for this index - uint32_t num_rangesrvs; - - //The rank's range server information, if it is a range server for this index - rangesrv_info myinfo; - - //Statistics retrieved from the mdhimStatFlush operation - struct mdhim_stat *stats; - - UT_hash_handle hh; /* makes this structure hashable */ - UT_hash_handle hh_name; /* makes this structure hashable by name */ -}; - -typedef struct index_manifest_t { - int key_type; //The type of key - int index_type; /* The type of index - (PRIMARY_INDEX, SECONDARY_INDEX) */ - int index_id; /* The id of the index in the hash table */ - int primary_id; - char *index_name; /* The name of the index in the hash table */ - int db_type; - uint32_t num_rangesrvs; - int rangesrv_factor; - uint64_t slice_size; - int num_nodes; - int local_server_rank; -} index_manifest_t; - -int update_stat(struct mdhim_t *md, struct index_t *bi, void *key, uint32_t key_len); -int load_stats(struct mdhim_t *md, struct index_t *bi); -int write_stats(struct mdhim_t *md, struct index_t *bi); -int open_db_store(struct mdhim_t *md, struct index_t *index); -uint32_t get_num_range_servers(struct mdhim_t *md, struct index_t *index); -struct index_t *create_local_index(struct mdhim_t *md, int db_type, int key_type, char *index_name); -struct index_t *create_global_index(struct mdhim_t *md, int server_factor, - uint64_t max_recs_per_slice, int db_type, - int key_type, char *index_name); -int get_rangesrvs(struct mdhim_t *md, struct index_t *index); -uint32_t is_range_server(struct mdhim_t *md, int rank, struct index_t *index); -int index_init_comm(struct mdhim_t *md, struct index_t *bi); -int get_stat_flush(struct mdhim_t *md, struct index_t *index); -struct index_t *get_index(struct mdhim_t *md, int index_id); -struct index_t *get_index_by_name(struct mdhim_t *md, char *index_name); -void indexes_release(struct mdhim_t *md); -int im_range_server(struct index_t *index); - -#endif diff --git a/meta/src/local_client.c b/meta/src/local_client.c deleted file mode 100644 index 0a5d64a20..000000000 --- a/meta/src/local_client.c +++ /dev/null @@ -1,305 +0,0 @@ -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * - * Copyright 2017, UT-Battelle, LLC. - * - * LLNL-CODE-741539 - * All rights reserved. - * - * This is the license for UnifyFS. - * For details, see https://github.com/LLNL/UnifyFS. - * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. - */ - -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * Copyright (c) 2017, Florida State University. Contributions from - * the Computer Architecture and Systems Research Laboratory (CASTL) - * at the Department of Computer Science. - * - * Written by: Teng Wang, Adam Moody, Weikuan Yu, Kento Sato, Kathryn Mohror - * LLNL-CODE-728877. All rights reserved. - * - * This file is part of burstfs. - * For details, see https://github.com/llnl/burstfs - * Please read https://github.com/llnl/burstfs/LICENSE for full license text. - */ - -/* - * - * Copyright (c) 2014, Los Alamos National Laboratory - * All rights reserved. - * - */ - -#include -#include "mdhim.h" -#include "local_client.h" - -/** - * get_msg_self - * Gets a message from the range server if we are waiting to hear back from ourselves - * This means that the range server is running in the same process as the caller, - * but on a different thread - * - * @param md the main mdhim struct - * @return a pointer to the message received or NULL - */ -static void *get_msg_self(struct mdhim_t *md) { - void *msg; - - //Lock the receive msg mutex - pthread_mutex_lock(md->receive_msg_mutex); - //Wait until there is a message to receive - if (!md->receive_msg) { - pthread_cond_wait(md->receive_msg_ready_cv, md->receive_msg_mutex); - } - - //Get the message - msg = md->receive_msg; - //Set the message queue to null - md->receive_msg = NULL; - //unlock the mutex - pthread_mutex_unlock(md->receive_msg_mutex); - - return msg; -} - -/** - * Send put to range server - * - * @param md main MDHIM struct - * @param pm pointer to put message to be sent or inserted into the range server's work queue - * @return return_message structure with ->error = MDHIM_SUCCESS or MDHIM_ERROR - */ -struct mdhim_rm_t *local_client_put(struct mdhim_t *md, struct mdhim_putm_t *pm) { - int ret; - struct mdhim_rm_t *rm; - work_item *item; - - if ((item = malloc(sizeof(work_item))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "Error while allocating memory for client"); - return NULL; - } - - memset(item, 0, sizeof(work_item)); - item->message = (void *)pm; - item->source = md->mdhim_rank; - if ((ret = range_server_add_work(md, item)) != MDHIM_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "Error adding work to range server in local_client_put"); - return NULL; - } - - rm = (struct mdhim_rm_t *) get_msg_self(md); - // Return response - - return rm; -} - -/** - * Send bulk put to range server - * - * @param md main MDHIM struct - * @param bpm pointer to bulk put message to be sent or inserted into the range server's work queue - * @return return_message structure with ->error = MDHIM_SUCCESS or MDHIM_ERROR -*/ -struct mdhim_rm_t *local_client_bput(struct mdhim_t *md, struct mdhim_bputm_t *bpm) { - int ret; - struct mdhim_rm_t *brm; - work_item *item; - - if ((item = malloc(sizeof(work_item))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "Error while allocating memory for client"); - return NULL; - } - - item->message = (void *)bpm; - item->source = md->mdhim_rank; - if ((ret = range_server_add_work(md, item)) != MDHIM_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "Error adding work to range server in local_client_put"); - return NULL; - } - - brm = (struct mdhim_rm_t *) get_msg_self(md); - - // Return response - return brm; -} - -/** - * Send bulk get to range server - * - * @param md main MDHIM struct - * @param bgm pointer to get message to be sent or inserted into the range server's work queue - * @return return_message structure with ->error = MDHIM_SUCCESS or MDHIM_ERROR - */ -struct mdhim_bgetrm_t *local_client_bget(struct mdhim_t *md, struct mdhim_bgetm_t *bgm) { - int ret; - struct mdhim_bgetrm_t *rm; - work_item *item; - - if ((item = malloc(sizeof(work_item))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "Error while allocating memory for client"); - return NULL; - } - - item->message = (void *)bgm; - item->source = md->mdhim_rank; - if ((ret = range_server_add_work(md, item)) != MDHIM_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "Error adding work to range server in local_client_put"); - return NULL; - } - - rm = (struct mdhim_bgetrm_t *) get_msg_self(md); - - // Return response - return rm; -} - -/** - * Send get with an op and number of records greater than 1 to range server - * - * @param md main MDHIM struct - * @param gm pointer to get message to be inserted into the range server's work queue - * @return return_message structure with ->error = MDHIM_SUCCESS or MDHIM_ERROR - */ -struct mdhim_bgetrm_t *local_client_bget_op(struct mdhim_t *md, struct mdhim_getm_t *gm) { - int ret; - struct mdhim_bgetrm_t *rm; - work_item *item; - - if ((item = malloc(sizeof(work_item))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "Error while allocating memory for client"); - return NULL; - } - - item->message = (void *)gm; - item->source = md->mdhim_rank; - if ((ret = range_server_add_work(md, item)) != MDHIM_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "Error adding work to range server in local_client_put"); - return NULL; - } - - rm = (struct mdhim_bgetrm_t *) get_msg_self(md); - - // Return response - return rm; -} - -/** - * Send commit to range server - * - * @param md main MDHIM struct - * @param cm pointer to put message to be inserted into the range server's work queue - * @return return_message structure with ->error = MDHIM_SUCCESS or MDHIM_ERROR - */ -struct mdhim_rm_t *local_client_commit(struct mdhim_t *md, struct mdhim_basem_t *cm) { - int ret; - struct mdhim_rm_t *rm; - work_item *item; - - if ((item = malloc(sizeof(work_item))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "Error while allocating memory for client"); - return NULL; - } - - item->message = (void *)cm; - item->source = md->mdhim_rank; - if ((ret = range_server_add_work(md, item)) != MDHIM_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "Error adding work to range server in local_client_put"); - return NULL; - } - - rm = (struct mdhim_rm_t *) get_msg_self(md); - // Return response - - return rm; -} - -/** - * Send delete to range server - * - * @param md main MDHIM struct - * @param dm pointer to delete message to be inserted into the range server's work queue - * @return return_message structure with ->error = MDHIM_SUCCESS or MDHIM_ERROR - */ -struct mdhim_rm_t *local_client_delete(struct mdhim_t *md, struct mdhim_delm_t *dm) { - int ret; - struct mdhim_rm_t *rm; - work_item *item; - - if ((item = malloc(sizeof(work_item))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "Error while allocating memory for client"); - return NULL; - } - - item->message = (void *)dm; - item->source = md->mdhim_rank; - if ((ret = range_server_add_work(md, item)) != MDHIM_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "Error adding work to range server in local_client_put"); - return NULL; - } - - rm = (struct mdhim_rm_t *) get_msg_self(md); - - // Return response - return rm; - -} - -/** - * Send bulk delete to MDHIM - * - * @param md main MDHIM struct - * @param bdm pointer to bulk delete message to be inserted into the range server's work queue - * @return return_message structure with ->error = MDHIM_SUCCESS or MDHIM_ERROR - */ -struct mdhim_rm_t *local_client_bdelete(struct mdhim_t *md, struct mdhim_bdelm_t *bdm) { - int ret; - struct mdhim_rm_t *brm; - work_item *item; - - if ((item = malloc(sizeof(work_item))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "Error while allocating memory for client"); - return NULL; - } - - item->message = (void *)bdm; - item->source = md->mdhim_rank; - if ((ret = range_server_add_work(md, item)) != MDHIM_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "Error adding work to range server in local_client_put"); - return NULL; - } - - brm = (struct mdhim_rm_t *) get_msg_self(md); - - // Return response - return brm; -} - -/** - * Send close to range server - * - * @param md main MDHIM struct - * @param cm pointer to close message to be inserted into the range server's work queue - */ -void local_client_close(struct mdhim_t *md, struct mdhim_basem_t *cm) { - int ret; - work_item *item; - - if ((item = malloc(sizeof(work_item))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "Error while allocating memory for client"); - return; - } - - item->message = (void *)cm; - item->source = md->mdhim_rank; - if ((ret = range_server_add_work(md, item)) != MDHIM_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "Error adding work to range server in local_client_put"); - return; - } - - return; -} diff --git a/meta/src/local_client.h b/meta/src/local_client.h deleted file mode 100644 index 42e6c4a12..000000000 --- a/meta/src/local_client.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * - * Copyright 2017, UT-Battelle, LLC. - * - * LLNL-CODE-741539 - * All rights reserved. - * - * This is the license for UnifyFS. - * For details, see https://github.com/LLNL/UnifyFS. - * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. - */ - -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * Copyright (c) 2017, Florida State University. Contributions from - * the Computer Architecture and Systems Research Laboratory (CASTL) - * at the Department of Computer Science. - * - * Written by: Teng Wang, Adam Moody, Weikuan Yu, Kento Sato, Kathryn Mohror - * LLNL-CODE-728877. All rights reserved. - * - * This file is part of burstfs. - * For details, see https://github.com/llnl/burstfs - * Please read https://github.com/llnl/burstfs/LICENSE for full license text. - */ - -/* - * - * Copyright (c) 2014, Los Alamos National Laboratory - * All rights reserved. - * - */ - -#ifndef __LOCAL_CLIENT_H -#define __LOCAL_CLIENT_H - -#include "messages.h" - -struct mdhim_rm_t *local_client_put(struct mdhim_t *md, struct mdhim_putm_t *pm); -struct mdhim_rm_t *local_client_bput(struct mdhim_t *md, struct mdhim_bputm_t *bpm); -struct mdhim_bgetrm_t *local_client_bget(struct mdhim_t *md, struct mdhim_bgetm_t *bgm); -struct mdhim_bgetrm_t *local_client_bget_op(struct mdhim_t *md, struct mdhim_getm_t *gm); -struct mdhim_rm_t *local_client_commit(struct mdhim_t *md, struct mdhim_basem_t *cm); -struct mdhim_rm_t *local_client_delete(struct mdhim_t *md, struct mdhim_delm_t *dm); -struct mdhim_rm_t *local_client_bdelete(struct mdhim_t *md, struct mdhim_bdelm_t *dm); -void local_client_close(struct mdhim_t *md, struct mdhim_basem_t *cm); - -#endif diff --git a/meta/src/mdhim.c b/meta/src/mdhim.c deleted file mode 100644 index a48e0ee07..000000000 --- a/meta/src/mdhim.c +++ /dev/null @@ -1,998 +0,0 @@ -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * - * Copyright 2017, UT-Battelle, LLC. - * - * LLNL-CODE-741539 - * All rights reserved. - * - * This is the license for UnifyFS. - * For details, see https://github.com/LLNL/UnifyFS. - * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. - */ - -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * Copyright (c) 2017, Florida State University. Contributions from - * the Computer Architecture and Systems Research Laboratory (CASTL) - * at the Department of Computer Science. - * - * Written by: Teng Wang, Adam Moody, Weikuan Yu, Kento Sato, Kathryn Mohror - * LLNL-CODE-728877. All rights reserved. - * - * This file is part of burstfs. - * For details, see https://github.com/llnl/burstfs - * Please read https://github.com/llnl/burstfs/LICENSE for full license text. - */ - -/* - * - * Copyright (c) 2014, Los Alamos National Laboratory - * All rights reserved. - * - */ - -#define _XOPEN_SOURCE 500 -#include -#include -#include -#include -#include -#include "mdhim.h" -#include "range_server.h" -#include "client.h" -#include "local_client.h" -#include "partitioner.h" -#include "mdhim_options.h" -#include "indexes.h" -#include "mdhim_private.h" - - -/*! \mainpage MDHIM TNG - * - * \section intro_sec Introduction - * - * MDHIM TNG is a key/value store for HPC - * - */ - - -/** - * mdhimInit - * Initializes MDHIM - Collective call - * - * @param appComm the communicator that was passed in from the application (e.g., MPI_COMM_WORLD) - * @param opts Options structure for DB creation, such as name, and primary key type - * @return mdhim_t* that contains info about this instance or NULL if there was an error - */ - -int dbg_rank; -int unlink_cb(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf); -struct mdhim_t *mdhimInit(void *appComm, struct mdhim_options_t *opts) { - int ret = 0; - int flag, provided; - struct mdhim_t *md; - struct index_t *primary_index; - MPI_Comm comm; - - if (!opts) { - //Set default options if no options were passed - opts = mdhim_options_init(); - mdhim_options_set_db_path(opts, "/tmp/hng/"); - mdhim_options_set_db_name(opts, "mdhimDb"); - mdhim_options_set_db_type(opts, LEVELDB); - mdhim_options_set_server_factor(opts, 1); - mdhim_options_set_max_recs_per_slice(opts, 1000); - mdhim_options_set_key_type(opts, MDHIM_BYTE_KEY); - mdhim_options_set_debug_level(opts, MLOG_CRIT); - mdhim_options_set_num_worker_threads(opts, 30); - } - - //Open mlog - stolen from plfs - ret = mlog_open((char *)"mdhim", 0, - opts->debug_level, opts->debug_level, NULL, 0, MLOG_LOGPID, 0); - - //Check if MPI has been initialized - if ((ret = MPI_Initialized(&flag)) != MPI_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM - Error while calling MPI_Initialized"); - exit(1); - } - if (!flag) { - //Initialize MPI with multiple thread support since MPI hasn't been initialized - ret = MPI_Init_thread(NULL, NULL, MPI_THREAD_MULTIPLE, &provided); - if (ret != MPI_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM - Error while calling MPI_Init_thread"); - exit(1); - } - //Quit if MPI didn't initialize with multiple threads - if (provided != MPI_THREAD_MULTIPLE) { - mlog(MDHIM_CLIENT_WARN, "MDHIM - Error while initializing MPI with threads"); - //exit(1); - } - } - - if (appComm) { - comm = *((MPI_Comm *) appComm); - } else { - comm = MPI_COMM_WORLD; - } - - //Allocate memory for the main MDHIM structure - md = malloc(sizeof(struct mdhim_t)); - memset(md, 0, sizeof(struct mdhim_t)); - if (!md) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM - Error while allocating memory while initializing"); - return NULL; - } - - //Set the options passed or the defaults created - md->db_opts = opts; - - if ((ret = MPI_Comm_dup(comm, &md->mdhim_comm)) != MPI_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "Error while initializing the MDHIM communicator"); - return NULL; - } - - //Get our rank in the main MDHIM communicator - if ((ret = MPI_Comm_rank(md->mdhim_comm, &md->mdhim_rank)) != MPI_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "Error getting our rank while initializing MDHIM"); - return NULL; - } - - //Initialize mdhim_comm mutex - md->mdhim_comm_lock = malloc(sizeof(pthread_mutex_t)); - if (!md->mdhim_comm_lock) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - " - "Error while allocating memory for client", - md->mdhim_rank); - return NULL; - } - - if ((ret = pthread_mutex_init(md->mdhim_comm_lock, NULL)) != 0) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - " - "Error while initializing mdhim_comm_lock", md->mdhim_rank); - return NULL; - } - - //Dup the communicator passed in for barriers between clients - if ((ret = MPI_Comm_dup(comm, &md->mdhim_client_comm)) != MPI_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "Error while initializing the MDHIM communicator"); - return NULL; - } - - //Get the size of the main MDHIM communicator - if ((ret = MPI_Comm_size(md->mdhim_comm, &md->mdhim_comm_size)) != MPI_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error getting the size of the " - "comm while initializing", - md->mdhim_rank); - return NULL; - } - - if ((ret = MPI_Comm_rank(md->mdhim_comm, &dbg_rank)) != MPI_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error getting the rank of the " - "comm while initializing", - md->mdhim_rank); - return NULL; - } - - //Initialize receive msg mutex - used for receiving a message from myself - md->receive_msg_mutex = malloc(sizeof(pthread_mutex_t)); - if (!md->receive_msg_mutex) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - " - "Error while allocating memory for client", - md->mdhim_rank); - return NULL; - } - if ((ret = pthread_mutex_init(md->receive_msg_mutex, NULL)) != 0) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - " - "Error while initializing receive queue mutex", md->mdhim_rank); - return NULL; - } - //Initialize the receive condition variable - used for receiving a message from myself - md->receive_msg_ready_cv = malloc(sizeof(pthread_cond_t)); - if (!md->receive_msg_ready_cv) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - " - "Error while allocating memory for client", - md->mdhim_rank); - return NULL; - } - if ((ret = pthread_cond_init(md->receive_msg_ready_cv, NULL)) != 0) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - " - "Error while initializing client receive condition variable", - md->mdhim_rank); - return NULL; - } - - //Initialize the partitioner - partitioner_init(); - - //Initialize the indexes and create the primary index - md->indexes = NULL; - md->indexes_by_name = NULL; - md->indexes_lock = malloc(sizeof(pthread_rwlock_t)); - if (pthread_rwlock_init(md->indexes_lock, NULL) != 0) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - " - "Error while initializing remote_indexes_lock", - md->mdhim_rank); - return NULL; - } - - //Create the default remote primary index - primary_index = create_global_index(md, opts->rserver_factor, opts->max_recs_per_slice, - opts->db_type, opts->db_key_type, NULL); - if (!primary_index) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - " - "Couldn't create the default index", - md->mdhim_rank); - return NULL; - } - md->primary_index = primary_index; - - //Set the local receive queue to NULL - used for sending and receiving to/from ourselves - md->receive_msg = NULL; - MPI_Barrier(md->mdhim_client_comm); - - - return md; -} - -/** - * Quits the MDHIM instance - collective call - * - * @param md main MDHIM struct - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - */ -int mdhimClose(struct mdhim_t *md) { - int ret; - struct timeval start, end; - - mlog(MDHIM_CLIENT_DBG, "MDHIM Rank %d: Called close", md->mdhim_rank); - gettimeofday(&start, NULL); - MPI_Barrier(md->mdhim_client_comm); - gettimeofday(&end, NULL); -// printf("Took: %lu seconds to complete first close barrier\n", end.tv_sec - start.tv_sec); - - gettimeofday(&start, NULL); - //Stop range server if I'm a range server - if (md->mdhim_rs && (ret = range_server_stop(md)) != MDHIM_SUCCESS) { - return MDHIM_ERROR; - } - - gettimeofday(&end, NULL); -// printf("Took: %lu seconds to stop the range server\n", end.tv_sec - start.tv_sec); - - //Free up memory used by the partitioner - partitioner_release(); - - //Free up memory used by indexes - indexes_release(md); - - //Destroy the receive condition variable - if ((ret = pthread_cond_destroy(md->receive_msg_ready_cv)) != 0) { - return MDHIM_ERROR; - } - free(md->receive_msg_ready_cv); - - //Destroy the receive mutex - if ((ret = pthread_mutex_destroy(md->receive_msg_mutex)) != 0) { - return MDHIM_ERROR; - } - free(md->receive_msg_mutex); - - if ((ret = pthread_rwlock_destroy(md->indexes_lock)) != 0) { - return MDHIM_ERROR; - } - free(md->indexes_lock); - - gettimeofday(&start, NULL); - MPI_Barrier(md->mdhim_client_comm); - //Destroy the client_comm_lock - if ((ret = pthread_mutex_destroy(md->mdhim_comm_lock)) != 0) { - return MDHIM_ERROR; - } - gettimeofday(&end, NULL); - free(md->mdhim_comm_lock); -// printf("Took: %lu seconds to complete the second close barrier\n", end.tv_sec - start.tv_sec); - mlog(MDHIM_CLIENT_DBG, "MDHIM Rank %d: Finished close", md->mdhim_rank); - - MPI_Comm_free(&md->mdhim_client_comm); - MPI_Comm_free(&md->mdhim_comm); - free(md); - - //Close MLog - mlog_close(); - - return MDHIM_SUCCESS; -} - -/** - * Commits outstanding MDHIM writes - collective call - * - * @param md main MDHIM struct - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - */ -int mdhimCommit(struct mdhim_t *md, struct index_t *index) { - int ret = MDHIM_SUCCESS; - struct mdhim_basem_t *cm; - struct mdhim_rm_t *rm = NULL; - - MPI_Barrier(md->mdhim_client_comm); - //If I'm a range server, send a commit message to myself - if (im_range_server(index)) { - cm = malloc(sizeof(struct mdhim_basem_t)); - cm->mtype = MDHIM_COMMIT; - cm->index = index->id; - cm->index_type = index->type; - rm = local_client_commit(md, cm); - if (!rm || rm->error) { - ret = MDHIM_ERROR; - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - " - "Error while committing database in mdhimCommit", - md->mdhim_rank); - } - - if (rm) { - free(rm); - } - } - - MPI_Barrier(md->mdhim_client_comm); - - return ret; -} - -/** - * Inserts a single record into MDHIM - * - * @param md main MDHIM struct - * @param primary_key pointer to key to store - * @param primary_key_len the length of the key - * @param value pointer to the value to store - * @param value_len the length of the value - * @param secondary_info secondary global and local information for - inserting secondary global and local keys - * @return mdhim_brm_t * or NULL on error - */ -struct mdhim_brm_t *mdhimPut(struct mdhim_t *md, - /*Primary key */ - void *primary_key, int primary_key_len, - void *value, int value_len, - /* Optional secondary global and local keys */ - struct secondary_info *secondary_global_info, - struct secondary_info *secondary_local_info) { - int i; - //Return message list - struct mdhim_brm_t *head; - void **primary_keys; - int *primary_key_lens; - //Return message from each _put_record casll - struct mdhim_brm_t *brm; - struct mdhim_rm_t *rm; - - brm = NULL; - rm = NULL; - head = NULL; - if (!primary_key || !primary_key_len || - !value || !value_len) { - return NULL; - } - - rm = _put_record(md, md->primary_index, primary_key, primary_key_len, value, value_len); - if (!rm || rm->error) { - return head; - } - - head = _create_brm(rm); - mdhim_full_release_msg(rm); - - //Insert the secondary local key if it was given - if (secondary_local_info && secondary_local_info->secondary_index && - secondary_local_info->secondary_keys && - secondary_local_info->secondary_key_lens && - secondary_local_info->num_keys) { - primary_keys = malloc(sizeof(void *) * secondary_local_info->num_keys); - primary_key_lens = malloc(sizeof(int) * secondary_local_info->num_keys); - for (i = 0; i < secondary_local_info->num_keys; i++) { - primary_keys[i] = primary_key; - primary_key_lens[i] = primary_key_len; - } - - brm = _bput_records(md, secondary_local_info->secondary_index, - secondary_local_info->secondary_keys, - secondary_local_info->secondary_key_lens, - primary_keys, primary_key_lens, - secondary_local_info->num_keys); - - free(primary_keys); - free(primary_key_lens); - if (!brm) { - return head; - } - - _concat_brm(head, brm); - } - - //Insert the secondary global key if it was given - if (secondary_global_info && secondary_global_info->secondary_index && - secondary_global_info->secondary_keys && - secondary_global_info->secondary_key_lens && - secondary_global_info->num_keys) { - primary_keys = malloc(sizeof(void *) * secondary_global_info->num_keys); - primary_key_lens = malloc(sizeof(int) * secondary_global_info->num_keys); - for (i = 0; i < secondary_global_info->num_keys; i++) { - primary_keys[i] = primary_key; - primary_key_lens[i] = primary_key_len; - } - brm = _bput_records(md, secondary_global_info->secondary_index, - secondary_global_info->secondary_keys, - secondary_global_info->secondary_key_lens, - primary_keys, primary_key_lens, - secondary_global_info->num_keys); - - free(primary_keys); - free(primary_key_lens); - if (!brm) { - return head; - } - - _concat_brm(head, brm); - } - - return head; -} - -/** - * Inserts a single record into an MDHIM secondary index - * - * @param md main MDHIM struct - * @param secondary_key pointer to key to store - * @param secondary_key_len the length of the key - * @param primary_key pointer to the primary_key - * @param primary_key_len the length of the value - * @return mdhim_brm_t * or NULL on error - */ -struct mdhim_brm_t *mdhimPutSecondary(struct mdhim_t *md, - struct index_t *secondary_index, - /*Secondary key */ - void *secondary_key, int secondary_key_len, - /* Primary key */ - void *primary_key, int primary_key_len) { - - //Return message list - struct mdhim_brm_t *head; - - //Return message from each _put_record casll - struct mdhim_rm_t *rm; - - rm = NULL; - head = NULL; - if (!secondary_key || !secondary_key_len || - !primary_key || !primary_key_len) { - return NULL; - } - - rm = _put_record(md, secondary_index, secondary_key, secondary_key_len, - primary_key, primary_key_len); - if (!rm || rm->error) { - return head; - } - - head = _create_brm(rm); - mdhim_full_release_msg(rm); - - return head; -} - -struct mdhim_brm_t *_bput_secondary_keys_from_info(struct mdhim_t *md, - struct secondary_bulk_info *secondary_info, - void **primary_keys, int *primary_key_lens, - int num_records) { - int i, j; - void **primary_keys_to_send; - int *primary_key_lens_to_send; - struct mdhim_brm_t *head, *new; - - head = new = NULL; - for (i = 0; i < num_records; i++) { - primary_keys_to_send = - malloc(secondary_info->num_keys[i] * sizeof(void *)); - primary_key_lens_to_send = - malloc(secondary_info->num_keys[i] * sizeof(int)); - - for (j = 0; j < secondary_info->num_keys[i]; j++) { - primary_keys_to_send[j] = primary_keys[i]; - primary_key_lens_to_send[j] = primary_key_lens[i]; - } - - new = _bput_records(md, secondary_info->secondary_index, - secondary_info->secondary_keys[i], - secondary_info->secondary_key_lens[i], - primary_keys_to_send, primary_key_lens_to_send, - secondary_info->num_keys[i]); - if (!head) { - head = new; - } else if (new) { - _concat_brm(head, new); - } - - free(primary_keys_to_send); - free(primary_key_lens_to_send); - } - - return head; -} - -/** - * Inserts multiple records into MDHIM - * - * @param md main MDHIM struct - * @param keys pointer to array of keys to store - * @param key_lens array with lengths of each key in keys - * @param values pointer to array of values to store - * @param value_lens array with lengths of each value - * @param num_records the number of records to store (i.e., the number of keys in keys array) - * @return mdhim_brm_t * or NULL on error - */ -struct mdhim_brm_t *mdhimBPut(struct mdhim_t *md, - void **primary_keys, int *primary_key_lens, - void **primary_values, int *primary_value_lens, - int num_records, - struct secondary_bulk_info *secondary_global_info, - struct secondary_bulk_info *secondary_local_info) { - struct mdhim_brm_t *head, *new; - - head = new = NULL; - if (!primary_keys || !primary_key_lens || - !primary_values || !primary_value_lens) { - return NULL; - } - - head = _bput_records(md, md->primary_index, primary_keys, primary_key_lens, - primary_values, primary_value_lens, num_records); - if (!head || head->error) { - return head; - } - - //Insert the secondary local keys if they were given - if (secondary_local_info && secondary_local_info->secondary_index && - secondary_local_info->secondary_keys && - secondary_local_info->secondary_key_lens) { - new = _bput_secondary_keys_from_info(md, secondary_local_info, primary_keys, - primary_key_lens, num_records); - if (new) { - _concat_brm(head, new); - } - } - - //Insert the secondary global keys if they were given - if (secondary_global_info && secondary_global_info->secondary_index && - secondary_global_info->secondary_keys && - secondary_global_info->secondary_key_lens) { - new = _bput_secondary_keys_from_info(md, secondary_global_info, primary_keys, - primary_key_lens, num_records); - if (new) { - _concat_brm(head, new); - } - } - - return head; -} - -/** - * Inserts multiple records into an MDHIM secondary index - * - * @param md main MDHIM struct - * @param index the secondary index to use - * @param keys pointer to array of keys to store - * @param key_lens array with lengths of each key in keys - * @param values pointer to array of values to store - * @param value_lens array with lengths of each value - * @param num_records the number of records to store (i.e., the number of keys in keys array) - * @return mdhim_brm_t * or NULL on error - */ -struct mdhim_brm_t *mdhimBPutSecondary(struct mdhim_t *md, struct index_t *secondary_index, - void **secondary_keys, int *secondary_key_lens, - void **primary_keys, int *primary_key_lens, - int num_records) { - struct mdhim_brm_t *head, *new; - - head = new = NULL; - if (!secondary_keys || !secondary_key_lens || - !primary_keys || !primary_key_lens) { - return NULL; - } - - head = _bput_records(md, secondary_index, secondary_keys, secondary_key_lens, - primary_keys, primary_key_lens, num_records); - if (!head || head->error) { - return head; - } - - return head; -} - -/** - * Retrieves a single record from MDHIM - * - * @param md main MDHIM struct - * @param key pointer to key to get value of or last key to start from if op is - (MDHIM_GET_NEXT or MDHIM_GET_PREV) - * @param key_len the length of the key - * @param op the operation type - * @return mdhim_getrm_t * or NULL on error - */ -struct mdhim_bgetrm_t *mdhimGet(struct mdhim_t *md, struct index_t *index, - void *key, int key_len, - int op) { - - void **keys; - int *key_lens; - struct mdhim_bgetrm_t *bgrm_head; - - if (op != MDHIM_GET_EQ && op != MDHIM_GET_PRIMARY_EQ) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - " - "Invalid op specified for mdhimGet", - md->mdhim_rank); - return NULL; - } - - if (!index) { - index = md->primary_index; - } - - //Create an a array with the single key and key len passed in - keys = malloc(sizeof(void *)); - key_lens = malloc(sizeof(int)); - keys[0] = key; - key_lens[0] = key_len; - - //Get the linked list of return messages from mdhimBGet - bgrm_head = _bget_records(md, index, keys, key_lens, 1, 1, op); - - //Clean up - free(keys); - free(key_lens); - - return bgrm_head; -} - -/** - * Retrieves multiple records from MDHIM - * - * @param md main MDHIM struct - * @param keys pointer to array of keys to get values for - * @param key_lens array with lengths of each key in keys - * @param num_records the number of keys to get (i.e., the number of keys in keys array) - * @return mdhim_bgetrm_t * or NULL on error - */ -struct mdhim_bgetrm_t *mdhimBGet(struct mdhim_t *md, struct index_t *index, - void **keys, int *key_lens, - int num_keys, int op) { - struct mdhim_bgetrm_t *bgrm_head, *lbgrm; - void **primary_keys; - int *primary_key_lens, plen; - struct index_t *primary_index; - int i; - - if (op != MDHIM_GET_EQ && op != MDHIM_GET_PRIMARY_EQ && op != MDHIM_RANGE_BGET) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - " - "Invalid operation for mdhimBGet", - md->mdhim_rank); - return NULL; - } - - //Check to see that we were given a sane amount of records - if (num_keys > MAX_BULK_OPS) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - " - "Too many bulk operations requested in mdhimBGet", - md->mdhim_rank); - return NULL; - } - - if (!index) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - " - "Invalid index specified", - md->mdhim_rank); - return NULL; - } - - bgrm_head = _bget_records(md, index, keys, key_lens, num_keys, 1, op); - if (!bgrm_head) { - return NULL; - } - - if (op == MDHIM_GET_PRIMARY_EQ) { - //Get the number of keys/values we received - plen = 0; - while (bgrm_head) { - for (i = 0; i < bgrm_head->num_keys; i++) - plen++; - bgrm_head = bgrm_head->next; - } - - if (plen > MAX_BULK_OPS) { - printf("plen is %d, MAX_BULK_OPS is %d\n", plen, - MAX_BULK_OPS); - fflush(stdout); - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - " - "Too many bulk operations would be performed " - "with the MDHIM_GET_PRIMARY_EQ operation. Limiting " - "request to : %u key/values", - md->mdhim_rank, MAX_BULK_OPS); - plen = MAX_BULK_OPS - 1; - } - - primary_keys = malloc(sizeof(void *) * plen); - primary_key_lens = malloc(sizeof(int) * plen); - //Initialize the primary keys array and key lens array - memset(primary_keys, 0, sizeof(void *) * plen); - memset(primary_key_lens, 0, sizeof(int) * plen); - - //Get the primary keys from the previously received messages' values - plen = 0; - while (bgrm_head) { - for (i = 0; i < bgrm_head->num_keys && plen < MAX_BULK_OPS ; i++) { - primary_keys[plen] = malloc(bgrm_head->value_lens[i]); - memcpy(primary_keys[plen], bgrm_head->values[i], - bgrm_head->value_lens[i]); - primary_key_lens[plen] = bgrm_head->value_lens[i]; - plen++; - } - - lbgrm = bgrm_head->next; - mdhim_full_release_msg(bgrm_head); - bgrm_head = lbgrm; - } - - primary_index = get_index(md, index->primary_id); - //Get the primary keys' values - bgrm_head = _bget_records(md, primary_index, - primary_keys, primary_key_lens, - plen, 1, MDHIM_GET_EQ); - - //Free up the primary keys and lens arrays - for (i = 0; i < plen; i++) { - free(primary_keys[i]); - } - - free(primary_keys); - free(primary_key_lens); - } - - //Return the head of the list - return bgrm_head; -} - - -/** - * Retrieves multiple sequential records from a single range server if they exist - * - * If the operation passed in is MDHIM_GET_NEXT or MDHIM_GET_PREV, this return all the records - * starting from the key passed in in the direction specified - * - * If the operation passed in is MDHIM_GET_FIRST and MDHIM_GET_LAST and the key is NULL, - * then this operation will return the keys starting from the first or last key - * - * If the operation passed in is MDHIM_GET_FIRST and MDHIM_GET_LAST and the key is not NULL, - * then this operation will return the keys starting the first key on - * the range server that the key resolves to - * - * @param md main MDHIM struct - * @param key pointer to the key to start getting next entries from - * @param key_len the length of the key - * @param num_records the number of successive keys to get - * @param op the operation to perform (i.e., MDHIM_GET_NEXT or MDHIM_GET_PREV) - * @return mdhim_bgetrm_t * or NULL on error - */ -struct mdhim_bgetrm_t *mdhimBGetOp(struct mdhim_t *md, struct index_t *index, - void *key, int key_len, - int num_records, int op) -{ - void **keys; - int *key_lens; - struct mdhim_bgetrm_t *bgrm_head; - printf("num_records is is %d, MAX_BULK_OPS is %d\n", num_records, - MAX_BULK_OPS); - fflush(stdout); - - if (num_records > MAX_BULK_OPS) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - " - "To many bulk operations requested in %s", - md->mdhim_rank, __func__); - return NULL; - } - - if (op == MDHIM_GET_EQ || op == MDHIM_GET_PRIMARY_EQ) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - " - "Invalid op specified for mdhimGet", - md->mdhim_rank); - return NULL; - } - - //Create an a array with the single key and key len passed in - keys = malloc(sizeof(void *)); - key_lens = malloc(sizeof(int)); - keys[0] = key; - key_lens[0] = key_len; - - //Get the linked list of return messages from mdhimBGet - bgrm_head = _bget_records(md, index, keys, key_lens, 1, num_records, op); - - //Clean up - free(keys); - free(key_lens); - - return bgrm_head; -} - -struct mdhim_bgetrm_t *mdhimBGetRange(struct mdhim_t *md, struct index_t *index, - void *start_key, void *end_key, int key_len) { - struct mdhim_bgetrm_t *bgrm_head; - - //Get the linked list of return messages from mdhimBGet - bgrm_head = _bget_range_records(md, index, start_key, end_key, key_len); - - return bgrm_head; -} - - - -/** - * Deletes a single record from MDHIM - * - * @param md main MDHIM struct - * @param key pointer to key to delete - * @param key_len the length of the key - * @return mdhim_rm_t * or NULL on error - */ -struct mdhim_brm_t *mdhimDelete(struct mdhim_t *md, struct index_t *index, - void *key, int key_len) { - struct mdhim_brm_t *brm_head; - void **keys; - int *key_lens; - - keys = malloc(sizeof(void *)); - key_lens = malloc(sizeof(int)); - keys[0] = key; - key_lens[0] = key_len; - - brm_head = _bdel_records(md, index, keys, key_lens, 1); - - free(keys); - free(key_lens); - - return brm_head; -} - -/** - * Deletes multiple records from MDHIM - * - * @param md main MDHIM struct - * @param keys pointer to array of keys to delete - * @param key_lens array with lengths of each key in keys - * @param num_records the number of keys to delete (i.e., the number of keys in keys array) - * @return mdhim_brm_t * or NULL on error - */ -struct mdhim_brm_t *mdhimBDelete(struct mdhim_t *md, struct index_t *index, - void **keys, int *key_lens, - int num_records) { - struct mdhim_brm_t *brm_head; - - - //Check to see that we were given a sane amount of records - if (num_records > MAX_BULK_OPS) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - " - "To many bulk operations requested in mdhimBGetOp", - md->mdhim_rank); - return NULL; - } - - brm_head = _bdel_records(md, index, keys, key_lens, num_records); - - //Return the head of the list - return brm_head; -} - - -/** - * Retrieves statistics from all the range servers - collective call - * - * @param md main MDHIM struct - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - */ -int mdhimStatFlush(struct mdhim_t *md, struct index_t *index) { - int ret; - - MPI_Barrier(md->mdhim_client_comm); - if ((ret = get_stat_flush(md, index)) != MDHIM_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - " - "Error while getting MDHIM stat data in mdhimStatFlush", - md->mdhim_rank); - } - MPI_Barrier(md->mdhim_client_comm); - - return ret; -} - -/** - * Sets the secondary_info structure used in mdhimPut - * - */ -struct secondary_info *mdhimCreateSecondaryInfo(struct index_t *secondary_index, - void **secondary_keys, int *secondary_key_lens, - int num_keys, int info_type) { - struct secondary_info *sinfo; - - - if (!secondary_index || !secondary_keys || - !secondary_key_lens || !num_keys) { - return NULL; - } - - if (info_type != SECONDARY_GLOBAL_INFO && - info_type != SECONDARY_LOCAL_INFO) { - return NULL; - } - - //Initialize the struct - sinfo = malloc(sizeof(struct secondary_info)); - memset(sinfo, 0, sizeof(struct secondary_info)); - - //Set the index fields - sinfo->secondary_index = secondary_index; - sinfo->secondary_keys = secondary_keys; - sinfo->secondary_key_lens = secondary_key_lens; - sinfo->num_keys = num_keys; - sinfo->info_type = info_type; - - return sinfo; -} - -void mdhimReleaseSecondaryInfo(struct secondary_info *si) { - free(si); - - return; -} - -/** - * Sets the secondary_info structure used in mdhimBPut - * - */ -struct secondary_bulk_info *mdhimCreateSecondaryBulkInfo(struct index_t *secondary_index, - void ***secondary_keys, - int **secondary_key_lens, - int *num_keys, int info_type) { - - struct secondary_bulk_info *sinfo; - - if (!secondary_index || !secondary_keys || - !secondary_key_lens || !num_keys) { - return NULL; - } - - if (info_type != SECONDARY_GLOBAL_INFO && - info_type != SECONDARY_LOCAL_INFO) { - return NULL; - } - - //Initialize the struct - sinfo = malloc(sizeof(struct secondary_bulk_info)); - memset(sinfo, 0, sizeof(struct secondary_bulk_info)); - - //Set the index fields - sinfo->secondary_index = secondary_index; - sinfo->secondary_keys = secondary_keys; - sinfo->secondary_key_lens = secondary_key_lens; - sinfo->num_keys = num_keys; - sinfo->info_type = info_type; - - return sinfo; -} - -void mdhimReleaseSecondaryBulkInfo(struct secondary_bulk_info *si) { - free(si); - - return; -} diff --git a/meta/src/mdhim.h b/meta/src/mdhim.h deleted file mode 100644 index 4a5366dd0..000000000 --- a/meta/src/mdhim.h +++ /dev/null @@ -1,179 +0,0 @@ -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * - * Copyright 2017, UT-Battelle, LLC. - * - * LLNL-CODE-741539 - * All rights reserved. - * - * This is the license for UnifyFS. - * For details, see https://github.com/LLNL/UnifyFS. - * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. - */ - -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * Copyright (c) 2017, Florida State University. Contributions from - * the Computer Architecture and Systems Research Laboratory (CASTL) - * at the Department of Computer Science. - * - * Written by: Teng Wang, Adam Moody, Weikuan Yu, Kento Sato, Kathryn Mohror - * LLNL-CODE-728877. All rights reserved. - * - * This file is part of burstfs. - * For details, see https://github.com/llnl/burstfs - * Please read https://github.com/llnl/burstfs/LICENSE for full license text. - */ - -/* - * - * Copyright (c) 2014, Los Alamos National Laboratory - * All rights reserved. - * - */ - -#ifndef __MDHIM_H -#define __MDHIM_H - -#include -#include -#include -#include "data_store.h" -#include "range_server.h" -#include "messages.h" -#include "partitioner.h" -#include "Mlog2/mlog2.h" -#include "Mlog2/mlogfacs2.h" -#include "mdhim_options.h" -#include "indexes.h" -#include "mdhim_private.h" - -#ifdef __cplusplus -extern "C" -{ -#endif -#define MDHIM_SUCCESS 0 -#define MDHIM_ERROR -1 -#define MDHIM_DB_ERROR -2 - -#define SECONDARY_GLOBAL_INFO 1 -#define SECONDARY_LOCAL_INFO 2 - -/* - * mdhim data - * Contains client communicator - * Contains a list of range servers - * Contains a pointer to mdhim_rs_t if rank is a range server - */ -struct mdhim_t { - //This communicator will include every process in the application, but is separate from main the app - //It is used for sending and receiving to and from the range servers - MPI_Comm mdhim_comm; - pthread_mutex_t *mdhim_comm_lock; - - //This communicator will include every process in the application, but is separate from the app - //It is used for barriers for clients - MPI_Comm mdhim_client_comm; - - //The rank in the mdhim_comm - int mdhim_rank; - //The size of mdhim_comm - int mdhim_comm_size; - //Flag to indicate mdhimClose was called - volatile int shutdown; - //A pointer to the primary index - struct index_t *primary_index; - //A linked list of range servers - struct index_t *indexes; - // The hash to hold the indexes by name - struct index_t *indexes_by_name; - - //Lock to allow concurrent readers and a single writer to the remote_indexes hash table - pthread_rwlock_t *indexes_lock; - - //The range server structure which is used only if we are a range server - mdhim_rs_t *mdhim_rs; - //The mutex used if receiving from ourselves - pthread_mutex_t *receive_msg_mutex; - //The condition variable used if receiving from ourselves - pthread_cond_t *receive_msg_ready_cv; - /* The receive msg, which is sent to the client by the - range server running in the same process */ - void *receive_msg; - //Options for DB creation - mdhim_options_t *db_opts; -}; - -struct secondary_info { - struct index_t *secondary_index; - void **secondary_keys; - int *secondary_key_lens; - int num_keys; - int info_type; -}; - -struct secondary_bulk_info { - struct index_t *secondary_index; - void ***secondary_keys; - int **secondary_key_lens; - int *num_keys; - int info_type; -}; - -struct mdhim_t *mdhimInit(void *appComm, struct mdhim_options_t *opts); -int mdhimClose(struct mdhim_t *md); -int mdhimCommit(struct mdhim_t *md, struct index_t *index); -int mdhimStatFlush(struct mdhim_t *md, struct index_t *index); -struct mdhim_brm_t *mdhimPut(struct mdhim_t *md, - void *key, int key_len, - void *value, int value_len, - struct secondary_info *secondary_global_info, - struct secondary_info *secondary_local_info); -struct mdhim_brm_t *mdhimPutSecondary(struct mdhim_t *md, - struct index_t *secondary_index, - /*Secondary key */ - void *secondary_key, int secondary_key_len, - /* Primary key */ - void *primary_key, int primary_key_len); -struct mdhim_brm_t *mdhimBPut(struct mdhim_t *md, - void **primary_keys, int *primary_key_lens, - void **primary_values, int *primary_value_lens, - int num_records, - struct secondary_bulk_info *secondary_global_info, - struct secondary_bulk_info *secondary_local_info); -struct mdhim_bgetrm_t *mdhimGet(struct mdhim_t *md, struct index_t *index, - void *key, int key_len, - int op); -struct mdhim_bgetrm_t *mdhimBGet(struct mdhim_t *md, struct index_t *index, - void **keys, int *key_lens, - int num_records, int op); -struct mdhim_bgetrm_t *mdhimBGetOp(struct mdhim_t *md, struct index_t *index, - void *key, int key_len, - int num_records, int op); - -struct mdhim_bgetrm_t *mdhimBGetRange(struct mdhim_t *md, struct index_t *index, - void *start_key, void *end_key, int key_len); - -struct mdhim_brm_t *mdhimDelete(struct mdhim_t *md, struct index_t *index, - void *key, int key_len); -struct mdhim_brm_t *mdhimBDelete(struct mdhim_t *md, struct index_t *index, - void **keys, int *key_lens, - int num_keys); -void mdhim_release_recv_msg(void *msg); -struct secondary_info *mdhimCreateSecondaryInfo(struct index_t *secondary_index, - void **secondary_keys, int *secondary_key_lens, - int num_keys, int info_type); - -void mdhimReleaseSecondaryInfo(struct secondary_info *si); -struct secondary_bulk_info *mdhimCreateSecondaryBulkInfo(struct index_t *secondary_index, - void ***secondary_keys, - int **secondary_key_lens, - int *num_keys, int info_type); -void mdhimReleaseSecondaryBulkInfo(struct secondary_bulk_info *si); - -#ifdef __cplusplus -} -#endif -#endif diff --git a/meta/src/mdhim_options.c b/meta/src/mdhim_options.c deleted file mode 100644 index dd19e33cb..000000000 --- a/meta/src/mdhim_options.c +++ /dev/null @@ -1,228 +0,0 @@ -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * - * Copyright 2017, UT-Battelle, LLC. - * - * LLNL-CODE-741539 - * All rights reserved. - * - * This is the license for UnifyFS. - * For details, see https://github.com/LLNL/UnifyFS. - * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. - */ - -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * Copyright (c) 2017, Florida State University. Contributions from - * the Computer Architecture and Systems Research Laboratory (CASTL) - * at the Department of Computer Science. - * - * Written by: Teng Wang, Adam Moody, Weikuan Yu, Kento Sato, Kathryn Mohror - * LLNL-CODE-728877. All rights reserved. - * - * This file is part of burstfs. - * For details, see https://github.com/llnl/burstfs - * Please read https://github.com/llnl/burstfs/LICENSE for full license text. - */ - -/* - * - * Copyright (c) 2014, Los Alamos National Laboratory - * All rights reserved. - * - */ - -/* - * DB usage options. - * Location and name of DB, type of DataSotre primary key type, - */ - -#include -#include -#include -#include -#include "mdhim_options.h" - -// Default path to a local path and name, levelDB=2, int_key_type=1, yes_create_new=1 -// and debug=1 (mlog_CRIT) - -#define MANIFEST_FILE_NAME "/mdhim_manifest_" - -struct mdhim_options_t *mdhim_options_init() -{ - struct mdhim_options_t* opts; - opts = malloc(sizeof(struct mdhim_options_t)); - - opts->db_path = "./"; - opts->db_name = "mdhimTstDB-"; - opts->manifest_path = NULL; - opts->db_type = 2; - opts->db_key_type = 1; - opts->db_create_new = 1; - opts->db_value_append = MDHIM_DB_OVERWRITE; - - opts->db_host = "localhost"; - opts->dbs_host = "localhost"; - opts->db_user = "test"; - opts->db_upswd = "pass"; - opts->dbs_user = "test"; - opts->dbs_upswd = "pass"; - - - opts->debug_level = 1; - opts->rserver_factor = 1; - opts->max_recs_per_slice = 100000; - opts->db_paths = NULL; - opts->num_paths = 0; - opts->num_wthreads = 1; - - set_manifest_path(opts, "./"); - return opts; -} - -int check_path_length(mdhim_options_t* opts, char *path) { - int path_len; - int ret = 0; - - path_len = strlen(path) + 1; - if (((!opts->db_name && path_len < PATH_MAX) || - ((path_len + strlen(opts->db_name)) < PATH_MAX)) && - (path_len + strlen(MANIFEST_FILE_NAME)) < PATH_MAX) { - ret = 1; - } else { - printf("Path: %s exceeds: %d bytes, so it won't be used\n", path, PATH_MAX); - } - - return ret; -} - -void set_manifest_path(mdhim_options_t* opts, char *path) { - char *manifest_path; - int path_len = 0; - - if (opts->manifest_path) { - free(opts->manifest_path); - opts->manifest_path = NULL; - } - - path_len = strlen(path) + strlen(MANIFEST_FILE_NAME) + 1; - manifest_path = malloc(path_len); - sprintf(manifest_path, "%s%s", path, MANIFEST_FILE_NAME); - opts->manifest_path = manifest_path; -} - -void mdhim_options_set_login_c(mdhim_options_t* opts, char* db_hl, char *db_ln, char *db_pw, char *dbs_hl, char *dbs_ln, char *dbs_pw){ - opts->db_host = db_hl; - opts->db_user = db_ln; - opts->db_upswd = db_pw; - opts->dbs_host = dbs_hl; - opts->dbs_user = dbs_ln; - opts->dbs_upswd = dbs_pw; - -} -void mdhim_options_set_db_path(mdhim_options_t* opts, char *path) -{ - int ret; - - if (!path) { - return; - } - - ret = check_path_length(opts, path); - if (ret) { - opts->db_path = path; - set_manifest_path(opts, path); - } -}; - -void mdhim_options_set_db_paths(struct mdhim_options_t* opts, char **paths, int num_paths) -{ - int i = 0; - int ret; - int verified_paths = -1; - - if (num_paths <= 0) { - return; - } - - opts->db_paths = malloc(sizeof(char *) * num_paths); - for (i = 0; i < num_paths; i++) { - if (!paths[i]) { - continue; - } - - ret = check_path_length(opts, paths[i]); - if (!ret) { - continue; - } - if (!i) { - set_manifest_path(opts, paths[i]); - } - - verified_paths++; - opts->db_paths[verified_paths] = malloc(strlen(paths[i]) + 1); - sprintf(opts->db_paths[verified_paths], "%s", paths[i]); - } - - opts->num_paths = ++verified_paths; -}; - -void mdhim_options_set_db_name(mdhim_options_t* opts, char *name) -{ - opts->db_name = name; -}; - -void mdhim_options_set_db_type(mdhim_options_t* opts, int type) -{ - opts->db_type = type; -}; - -void mdhim_options_set_key_type(mdhim_options_t* opts, int key_type) -{ - opts->db_key_type = key_type; -}; - -void mdhim_options_set_create_new_db(mdhim_options_t* opts, int create_new) -{ - opts->db_create_new = create_new; -}; - -void mdhim_options_set_debug_level(mdhim_options_t* opts, int dbug) -{ - opts->debug_level = dbug; -}; - -void mdhim_options_set_value_append(mdhim_options_t* opts, int append) -{ - opts->db_value_append = append; -}; - -void mdhim_options_set_server_factor(mdhim_options_t* opts, int server_factor) -{ - opts->rserver_factor = server_factor; -}; - -void mdhim_options_set_max_recs_per_slice(mdhim_options_t* opts, uint64_t max_recs_per_slice) -{ - opts->max_recs_per_slice = max_recs_per_slice; -}; - -void mdhim_options_set_num_worker_threads(mdhim_options_t* opts, int num_wthreads) -{ - if (num_wthreads > 0) { - opts->num_wthreads = num_wthreads; - } -}; - -void mdhim_options_destroy(mdhim_options_t *opts) { - int i; - - for (i = 0; i < opts->num_paths; i++) { - free(opts->db_paths[i]); - } - - free(opts->manifest_path); - free(opts); -}; diff --git a/meta/src/mdhim_options.h b/meta/src/mdhim_options.h deleted file mode 100644 index 31e7370f1..000000000 --- a/meta/src/mdhim_options.h +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * - * Copyright 2017, UT-Battelle, LLC. - * - * LLNL-CODE-741539 - * All rights reserved. - * - * This is the license for UnifyFS. - * For details, see https://github.com/LLNL/UnifyFS. - * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. - */ - -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * Copyright (c) 2017, Florida State University. Contributions from - * the Computer Architecture and Systems Research Laboratory (CASTL) - * at the Department of Computer Science. - * - * Written by: Teng Wang, Adam Moody, Weikuan Yu, Kento Sato, Kathryn Mohror - * LLNL-CODE-728877. All rights reserved. - * - * This file is part of burstfs. - * For details, see https://github.com/llnl/burstfs - * Please read https://github.com/llnl/burstfs/LICENSE for full license text. - */ - -/* - * - * Copyright (c) 2014, Los Alamos National Laboratory - * All rights reserved. - * - */ - -#ifndef __OPTIONS_H -#define __OPTIONS_H - -#include - -#ifdef __cplusplus -extern "C" -{ -#endif -/* Append option */ -#define MDHIM_DB_OVERWRITE 0 -#define MDHIM_DB_APPEND 1 - -// Options for the database (used when opening a MDHIM dataStore) -typedef struct mdhim_options_t { - // ------------------- - //Directory location of DBs - char *db_path; - - //Multiple paths of DBs - char **db_paths; - //Number of paths in db_paths - int num_paths; - - char *manifest_path; - - //Name of each DB (will be modified by adding "_" to create multiple - // unique DB for each rank server. - char *db_name; - - //Different types of dataStores - //LEVELDB=1 (from data_store.h) - int db_type; - - //Primary key type - //MDHIM_INT_KEY, MDHIM_LONG_INT_KEY, MDHIM_FLOAT_KEY, MDHIM_DOUBLE_KEY - //MDHIM_STRING_KEY, MDHIM_BYTE_KEY - //(from partitioner.h) - int db_key_type; - - //Force the creation of a new DB (deleting any previous versions if present) - int db_create_new; - - //Whether to append a value to an existing key or overwrite the value - //MDHIM_DB_APPEND to append or MDHIM_DB_OVERWRITE (default) - int db_value_append; - - //DEBUG level - int debug_level; - - //Used to determine the number of range servers which is based in - //if myrank % rserver_factor == 0, then myrank is a server. - // This option is used to set range_server_factor previously a defined var. - int rserver_factor; - - //Maximum size of a slice. A ranger server may server several slices. - uint64_t max_recs_per_slice; - - //Number of worker threads per range server - int num_wthreads; - - //Login Credentials - char *db_host; - char *dbs_host; - char *db_user; - char *db_upswd; - char *dbs_user; - char *dbs_upswd; - - -} mdhim_options_t; - -struct mdhim_options_t* mdhim_options_init(); -void mdhim_options_set_db_path(struct mdhim_options_t* opts, char *path); -void mdhim_options_set_db_paths(struct mdhim_options_t* opts, char **paths, int num_paths); -void mdhim_options_set_db_name(struct mdhim_options_t* opts, char *name); -void mdhim_options_set_db_type(struct mdhim_options_t* opts, int type); -void mdhim_options_set_key_type(struct mdhim_options_t* opts, int key_type); -void mdhim_options_set_create_new_db(struct mdhim_options_t* opts, int create_new); -void mdhim_options_set_login_c(struct mdhim_options_t* opts, char* db_hl, char *db_ln, char *db_pw, char *dbs_hl, char *dbs_ln, char *dbs_pw); -void mdhim_options_set_debug_level(struct mdhim_options_t* opts, int dbug); -void mdhim_options_set_value_append(struct mdhim_options_t* opts, int append); -void mdhim_options_set_server_factor(struct mdhim_options_t* opts, int server_factor); -void mdhim_options_set_max_recs_per_slice(struct mdhim_options_t* opts, uint64_t max_recs_per_slice); -void mdhim_options_set_num_worker_threads(struct mdhim_options_t* opts, int num_wthreads); -void set_manifest_path(mdhim_options_t* opts, char *path); -void mdhim_options_destroy(struct mdhim_options_t *opts); -#ifdef __cplusplus -} -#endif -#endif diff --git a/meta/src/mdhim_private.c b/meta/src/mdhim_private.c deleted file mode 100644 index 399ab0e0f..000000000 --- a/meta/src/mdhim_private.c +++ /dev/null @@ -1,671 +0,0 @@ -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * - * Copyright 2017, UT-Battelle, LLC. - * - * LLNL-CODE-741539 - * All rights reserved. - * - * This is the license for UnifyFS. - * For details, see https://github.com/LLNL/UnifyFS. - * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. - */ - -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * Copyright (c) 2017, Florida State University. Contributions from - * the Computer Architecture and Systems Research Laboratory (CASTL) - * at the Department of Computer Science. - * - * Written by: Teng Wang, Adam Moody, Weikuan Yu, Kento Sato, Kathryn Mohror - * LLNL-CODE-728877. All rights reserved. - * - * This file is part of burstfs. - * For details, see https://github.com/llnl/burstfs - * Please read https://github.com/llnl/burstfs/LICENSE for full license text. - */ - -/* - * - * Copyright (c) 2014, Los Alamos National Laboratory - * All rights reserved. - * - */ - -#include -#include "mdhim.h" -#include "client.h" -#include "local_client.h" -#include "partitioner.h" -#include "indexes.h" -#include -#include - -struct timeval localgetstart, localgetend; -double localgettime=0; - -struct timeval localrangestart, localrangeend; -double localrangetime=0; - -struct timeval localbpmstart, localbpmend; -double localbpmtime=0; -double calrangetime=0; - -struct timeval localcpystart, localcpyend; -double localcpytime = 0; -struct timeval localassignstart, localassignend; -double localassigntime = 0; -struct timeval localgetcpystart, localgetcpyend; -double localgetcpytime = 0; -struct timeval localmallocstart, localmallocend; -double localmalloctime = 0; - -struct timeval confgetstart; -double confgettime=0; -struct mdhim_rm_t *_put_record(struct mdhim_t *md, struct index_t *index, - void *key, int key_len, - void *value, int value_len) { - struct mdhim_rm_t *rm = NULL; - rangesrv_list *rl, *rlp; - int ret; - struct mdhim_putm_t *pm; - struct index_t *lookup_index, *put_index; - - put_index = index; - if (index->type == LOCAL_INDEX) { - lookup_index = get_index(md, index->primary_id); - if (!lookup_index) { - return NULL; - } - } else { - lookup_index = index; - } - - //Get the range server this key will be sent to - if (put_index->type == LOCAL_INDEX) { - if ((rl = get_range_servers(md, lookup_index, value, value_len)) == - NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - " - "Error while determining range server in mdhimBPut", - md->mdhim_rank); - return NULL; - } - } else { - //Get the range server this key will be sent to - if ((rl = get_range_servers(md, lookup_index, key, key_len)) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - " - "Error while determining range server in _put_record", - md->mdhim_rank); - return NULL; - } - } - - while (rl) { - pm = malloc(sizeof(struct mdhim_putm_t)); - if (!pm) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - " - "Error while allocating memory in _put_record", - md->mdhim_rank); - return NULL; - } - - //Initialize the put message - pm->basem.mtype = MDHIM_PUT; - pm->key = key; - pm->key_len = key_len; - pm->value = value; - pm->value_len = value_len; - pm->basem.server_rank = rl->ri->rank; - pm->basem.index = put_index->id; - pm->basem.index_type = put_index->type; - - //Test if I'm a range server - ret = im_range_server(put_index); - - //If I'm a range server and I'm the one this key goes to, send the message locally - if (ret && md->mdhim_rank == pm->basem.server_rank) { - rm = local_client_put(md, pm); - } else { - //Send the message through the network as this message is for another rank - rm = client_put(md, pm); - free(pm); - } - - rl = rl->next; - rlp = rl; - free(rlp); - } - - return rm; -} - -/* Creates a linked list of mdhim_rm_t messages */ -struct mdhim_brm_t *_create_brm(struct mdhim_rm_t *rm) { - struct mdhim_brm_t *brm; - - if (!rm) { - return NULL; - } - - brm = malloc(sizeof(struct mdhim_brm_t)); - memset(brm, 0, sizeof(struct mdhim_brm_t)); - brm->error = rm->error; - brm->basem.mtype = rm->basem.mtype; - brm->basem.index = rm->basem.index; - brm->basem.index_type = rm->basem.index_type; - brm->basem.server_rank = rm->basem.server_rank; - - return brm; -} - -/* adds new to the list pointed to by head */ -void _concat_brm(struct mdhim_brm_t *head, struct mdhim_brm_t *addition) { - struct mdhim_brm_t *brmp; - - brmp = head; - while (brmp->next) { - brmp = brmp->next; - } - - brmp->next = addition; - - return; -} - -struct mdhim_brm_t *_bput_records(struct mdhim_t *md, struct index_t *index, - void **keys, int *key_lens, - void **values, int *value_lens, - int num_keys) { - struct mdhim_bputm_t **bpm_list, *lbpm; - struct mdhim_bputm_t *bpm; - struct mdhim_brm_t *brm, *brm_head; - struct mdhim_rm_t *rm; - int i; - rangesrv_list *rl, *rlp; - struct index_t *lookup_index, *put_index; - - put_index = index; - if (index->type == LOCAL_INDEX) { - lookup_index = get_index(md, index->primary_id); - if (!lookup_index) { - return NULL; - } - } else { - lookup_index = index; - } - - //Check to see that we were given a sane amount of records - if (num_keys > MAX_BULK_OPS) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - " - "To many bulk operations requested in mdhimBGetOp", - md->mdhim_rank); - return NULL; - } - - //The message to be sent to ourselves if necessary - lbpm = NULL; - //Create an array of bulk put messages that holds one bulk message per range server - bpm_list = malloc(sizeof(struct mdhim_bputm_t *) * lookup_index->num_rangesrvs); - - //Initialize the pointers of the list to null - for (i = 0; i < lookup_index->num_rangesrvs; i++) { - bpm_list[i] = NULL; - } - - /* Go through each of the records to find the range server(s) the record belongs to. - If there is not a bulk message in the array for the range server the key belongs to, - then it is created. Otherwise, the data is added to the existing message in the array.*/ - gettimeofday(&localcpystart, NULL); - for (i = 0; i < num_keys && i < MAX_BULK_OPS; i++) { - //Get the range server this key will be sent to - if (put_index->type == LOCAL_INDEX) { - if ((rl = get_range_servers(md, lookup_index, values[i], value_lens[i])) == - NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - " - "Error while determining range server in mdhimBPut", - md->mdhim_rank); - continue; - } - } else { - gettimeofday(&localrangestart, NULL); - if ((rl = get_range_servers(md, lookup_index, keys[i], key_lens[i])) == - NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - " - "Error while determining range server in mdhimBPut", - md->mdhim_rank); - continue; - } - gettimeofday(&localrangeend, NULL); - localrangetime += 1000000 * (localrangeend.tv_sec - \ - localrangestart.tv_sec) + localrangeend.tv_usec - \ - localrangestart.tv_usec; - } - - //There could be more than one range server returned in the case of the local index - while (rl) { - gettimeofday(&localbpmstart, NULL); - if (rl->ri->rank != md->mdhim_rank) { - //Set the message in the list for this range server - bpm = bpm_list[rl->ri->rangesrv_num - 1]; - } else { - //Set the local message - bpm = lbpm; - } - gettimeofday(&localbpmend, NULL); - localbpmtime += 1000000 * (localbpmend.tv_sec - localbpmstart.tv_sec) + \ - localbpmend.tv_usec - localbpmstart.tv_usec; - //If the message doesn't exist, create one - gettimeofday(&localmallocstart, NULL); - if (!bpm) { - bpm = malloc(sizeof(struct mdhim_bputm_t)); - bpm->keys = malloc(sizeof(void *) * MAX_BULK_OPS); - bpm->key_lens = malloc(sizeof(int) * MAX_BULK_OPS); - bpm->values = malloc(sizeof(void *) * MAX_BULK_OPS); - bpm->value_lens = malloc(sizeof(int) * MAX_BULK_OPS); - bpm->num_keys = 0; - bpm->basem.server_rank = rl->ri->rank; - bpm->basem.mtype = MDHIM_BULK_PUT; - bpm->basem.index = put_index->id; - bpm->basem.index_type = put_index->type; - if (rl->ri->rank != md->mdhim_rank) { - bpm_list[rl->ri->rangesrv_num - 1] = bpm; - } else { - lbpm = bpm; - } - } - gettimeofday(&localmallocend, NULL); - localmalloctime += 1000000 * (localmallocend.tv_sec - \ - localmallocstart.tv_sec) + localmallocend.tv_usec - \ - localmallocstart.tv_usec; - - gettimeofday(&localassignstart, NULL); - //Add the key, lengths, and data to the message - bpm->keys[bpm->num_keys] = keys[i]; - bpm->key_lens[bpm->num_keys] = key_lens[i]; - bpm->values[bpm->num_keys] = values[i]; - bpm->value_lens[bpm->num_keys] = value_lens[i]; - bpm->num_keys++; - rlp = rl; - rl = rl->next; - free(rlp); - gettimeofday(&localassignend, NULL); - localassigntime += 1000000 * (localassignend.tv_sec - \ - localassignstart.tv_sec) + localassignend.tv_usec - \ - localassignstart.tv_usec; - } - } - gettimeofday(&localcpyend, NULL); - localcpytime += 1000000 * (localcpyend.tv_sec - localcpystart.tv_sec) + \ - localcpyend.tv_usec - localcpystart.tv_usec; - - //Make a list out of the received messages to return - brm_head = client_bput(md, put_index, bpm_list); - if (lbpm) { - rm = local_client_bput(md, lbpm); - if (rm) { - brm = _create_brm(rm); - brm->next = brm_head; - brm_head = brm; - free(rm); - } - } - - //Free up messages sent - for (i = 0; i < lookup_index->num_rangesrvs; i++) { - if (!bpm_list[i]) { - continue; - } - - free(bpm_list[i]->keys); - free(bpm_list[i]->values); - free(bpm_list[i]->key_lens); - free(bpm_list[i]->value_lens); - free(bpm_list[i]); - } - - free(bpm_list); - - //Return the head of the list - return brm_head; -} - -struct mdhim_bgetrm_t *_bget_records(struct mdhim_t *md, struct index_t *index, - void **keys, int *key_lens, - int num_keys, int num_records, int op) { - struct mdhim_bgetm_t **bgm_list; - struct mdhim_bgetm_t *bgm, *lbgm; - struct mdhim_bgetrm_t *bgrm_head, *lbgrm; - int i; - rangesrv_list *rl = NULL, *rlp; - - //The message to be sent to ourselves if necessary - lbgm = NULL; - //Create an array of bulk get messages that holds one bulk message per range server - bgm_list = malloc(sizeof(struct mdhim_bgetm_t *) * index->num_rangesrvs); - //Initialize the pointers of the list to null - for (i = 0; i < index->num_rangesrvs; i++) { - bgm_list[i] = NULL; - } - - /* Go through each of the records to find the range server the record belongs to. - If there is not a bulk message in the array for the range server the key belongs to, - then it is created. Otherwise, the data is added to the existing message in the array.*/ - for (i = 0; i < num_keys && i < MAX_BULK_OPS; i++) { - //Get the range server this key will be sent to - if ((op == MDHIM_GET_EQ || op == MDHIM_GET_PRIMARY_EQ || op == MDHIM_RANGE_BGET) && - index->type != LOCAL_INDEX && - (rl = get_range_servers(md, index, keys[i], key_lens[i])) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - " - "Error while determining range server in mdhimBget", - md->mdhim_rank); - free(bgm_list); - return NULL; - } else if ((index->type == LOCAL_INDEX || - (op != MDHIM_GET_EQ && op != MDHIM_GET_PRIMARY_EQ && op != MDHIM_RANGE_BGET)) && - (rl = get_range_servers_from_stats(md, index, keys[i], key_lens[i], op)) == - NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - " - "Error while determining range server in mdhimBget", - md->mdhim_rank); - free(bgm_list); - return NULL; - } - - gettimeofday(&localgetcpystart, NULL); - while (rl) { - if (rl->ri->rank != md->mdhim_rank) { - //Set the message in the list for this range server - bgm = bgm_list[rl->ri->rangesrv_num - 1]; - } else { - //Set the local message - bgm = lbgm; - } - - //If the message doesn't exist, create one - if (!bgm) { - bgm = malloc(sizeof(struct mdhim_bgetm_t)); - //bgm->keys = malloc(sizeof(void *) * MAX_BULK_OPS); - //bgm->key_lens = malloc(sizeof(int) * MAX_BULK_OPS); - bgm->keys = malloc(sizeof(void *) * num_keys); - bgm->key_lens = malloc(sizeof(int) * num_keys); - bgm->num_keys = 0; - bgm->num_recs = num_records; - bgm->basem.server_rank = rl->ri->rank; - bgm->basem.mtype = MDHIM_BULK_GET; - bgm->op = (op == MDHIM_GET_PRIMARY_EQ) ? MDHIM_GET_EQ : op; - bgm->basem.index = index->id; - bgm->basem.index_type = index->type; - if (rl->ri->rank != md->mdhim_rank) { - bgm_list[rl->ri->rangesrv_num - 1] = bgm; - } else { - lbgm = bgm; - } - } - - //Add the key, lengths, and data to the message - bgm->keys[bgm->num_keys] = keys[i]; - bgm->key_lens[bgm->num_keys] = key_lens[i]; - - bgm->num_keys++; - rlp = rl; - rl = rl->next; - free(rlp); - } - gettimeofday(&localgetcpyend, NULL); - localgetcpytime += 1000000 * (localgetcpyend.tv_sec - \ - localgetcpystart.tv_sec) + localgetcpyend.tv_usec - \ - localgetcpystart.tv_usec; - } - - //Make a list out of the received messages to return - gettimeofday(&localgetstart, NULL); - bgrm_head = client_bget(md, index, bgm_list); - if (lbgm) { - lbgrm = local_client_bget(md, lbgm); - lbgrm->next = bgrm_head; - bgrm_head = lbgrm; - } - gettimeofday(&localgetend, NULL); - localgettime += 1000000*(localgetend.tv_sec-localgetstart.tv_sec)+\ - localgetend.tv_usec-localgetstart.tv_usec; - - for (i = 0; i < index->num_rangesrvs; i++) { - if (!bgm_list[i]) { - continue; - } - - free(bgm_list[i]->keys); - free(bgm_list[i]->key_lens); - free(bgm_list[i]); - } - - free(bgm_list); - - return bgrm_head; -} - -struct mdhim_bgetrm_t *_bget_range_records(struct mdhim_t *md, struct index_t *index, - void *start_key, void *end_key, int key_len) { - struct mdhim_bgetm_t **bgm_list; - struct mdhim_bgetm_t *bgm, *lbgm; - struct mdhim_bgetrm_t *bgrm_head, *lbgrm; - int i; - rangesrv_list *rl = NULL, *rlp; - - gettimeofday(&localgetstart, NULL); - //The message to be sent to ourselves if necessary - lbgm = NULL; - //Create an array of bulk get messages that holds one bulk message per range server - bgm_list = malloc(sizeof(struct mdhim_bgetm_t *) * index->num_rangesrvs); - //Initialize the pointers of the list to null - for (i = 0; i < index->num_rangesrvs; i++) { - bgm_list[i] = NULL; - } - - //Get the range server this key will be sent to - if ((rl = get_range_servers_from_range(md, index, start_key, end_key, key_len)) == - NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - " - "Error while determining range server in mdhimBget", - md->mdhim_rank); - free(bgm_list); - return NULL; - } - - gettimeofday(&confgetstart, NULL); - calrangetime+=1000000*(confgetstart.tv_sec-localgetstart.tv_sec)+\ - confgetstart.tv_usec-localgetstart.tv_usec; - while (rl) { - if (rl->ri->rank != md->mdhim_rank) { - //Set the message in the list for this range server - bgm = bgm_list[rl->ri->rangesrv_num - 1]; - } else { - //Set the local message - bgm = lbgm; - } - - //If the message doesn't exist, create one - if (!bgm) { - bgm = malloc(sizeof(struct mdhim_bgetm_t)); - //bgm->keys = malloc(sizeof(void *) * MAX_BULK_OPS); - //bgm->key_lens = malloc(sizeof(int) * MAX_BULK_OPS); - bgm->keys = malloc(sizeof(void *)); - bgm->key_lens = malloc(sizeof(int)); - bgm->num_keys = 1; - bgm->keys[0] = NULL; - bgm->num_recs = 0; - bgm->basem.server_rank = rl->ri->rank; - bgm->basem.mtype = MDHIM_BULK_GET; - bgm->op = MDHIM_GET_NEXT; - bgm->basem.index = index->id; - bgm->basem.index_type = index->type; - if (rl->ri->rank != md->mdhim_rank) { - bgm_list[rl->ri->rangesrv_num - 1] = bgm; - } else { - lbgm = bgm; - } - } - - //Add the key, lengths, and data to the message - if (bgm->keys[0] == NULL) { - bgm->keys[0] = rl->ri->first_key; - bgm->key_lens[0] = key_len; - /* printf("the first key's fid is %ld, offset is %ld, key length is %ld, addr is %x\n", \ - *((long *)bgm->keys[0]), *(((long *)bgm->keys[0])+1), bgm->key_lens[0], bgm->keys[0]); - fflush(stdout); - */ - } - - bgm->num_recs+=rl->ri->num_recs; -// printf("here num_recs is %ld\n", bgm->num_recs); -// fflush(stdout); - rlp = rl; - rl = rl->next; - free(rlp); - } - - gettimeofday(&localgetend, NULL); - confgettime+=1000000*(localgetend.tv_sec-confgetstart.tv_sec)+\ - localgetend.tv_usec-confgetstart.tv_usec; - localgettime+=1000000*(localgetend.tv_sec-localgetstart.tv_sec)+ - localgetend.tv_usec-localgetstart.tv_usec; - - - //Make a list out of the received messages to return - bgrm_head = client_bget(md, index, bgm_list); - if (lbgm) { - lbgrm = local_client_bget(md, lbgm); - lbgrm->next = bgrm_head; - bgrm_head = lbgrm; - } - for (i = 0; i < index->num_rangesrvs; i++) { - if (!bgm_list[i]) { - continue; - } - - free(bgm_list[i]->keys); - free(bgm_list[i]->key_lens); - free(bgm_list[i]); - - } - - free(bgm_list); -/* printf("after freeing all these\n"); - fflush(stdout); -*/ - return bgrm_head; -} - -/** - * Deletes multiple records from MDHIM - * - * @param md main MDHIM struct - * @param keys pointer to array of keys to delete - * @param key_lens array with lengths of each key in keys - * @param num_keys the number of keys to delete (i.e., the number of keys in keys array) - * @return mdhim_brm_t * or NULL on error - */ -struct mdhim_brm_t *_bdel_records(struct mdhim_t *md, struct index_t *index, - void **keys, int *key_lens, - int num_keys) { - struct mdhim_bdelm_t **bdm_list; - struct mdhim_bdelm_t *bdm, *lbdm; - struct mdhim_brm_t *brm, *brm_head; - struct mdhim_rm_t *rm; - int i; - rangesrv_list *rl; - - //The message to be sent to ourselves if necessary - lbdm = NULL; - //Create an array of bulk del messages that holds one bulk message per range server - bdm_list = malloc(sizeof(struct mdhim_bdelm_t *) * index->num_rangesrvs); - //Initialize the pointers of the list to null - for (i = 0; i < index->num_rangesrvs; i++) { - bdm_list[i] = NULL; - } - - /* Go through each of the records to find the range server the record belongs to. - If there is not a bulk message in the array for the range server the key belongs to, - then it is created. Otherwise, the data is added to the existing message in the array.*/ - for (i = 0; i < num_keys && i < MAX_BULK_OPS; i++) { - //Get the range server this key will be sent to - if (index->type != LOCAL_INDEX && - (rl = get_range_servers(md, index, keys[i], key_lens[i])) == - NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - " - "Error while determining range server in mdhimBdel", - md->mdhim_rank); - continue; - } else if (index->type == LOCAL_INDEX && - (rl = get_range_servers_from_stats(md, index, keys[i], - key_lens[i], MDHIM_GET_EQ)) == - NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - " - "Error while determining range server in mdhimBdel", - md->mdhim_rank); - continue; - } - - if (rl->ri->rank != md->mdhim_rank) { - //Set the message in the list for this range server - bdm = bdm_list[rl->ri->rangesrv_num - 1]; - } else { - //Set the local message - bdm = lbdm; - } - - //If the message doesn't exist, create one - if (!bdm) { - bdm = malloc(sizeof(struct mdhim_bdelm_t)); - bdm->keys = malloc(sizeof(void *) * MAX_BULK_OPS); - bdm->key_lens = malloc(sizeof(int) * MAX_BULK_OPS); - bdm->num_keys = 0; - bdm->basem.server_rank = rl->ri->rank; - bdm->basem.mtype = MDHIM_BULK_DEL; - bdm->basem.index = index->id; - bdm->basem.index_type = index->type; - if (rl->ri->rank != md->mdhim_rank) { - bdm_list[rl->ri->rangesrv_num - 1] = bdm; - } else { - lbdm = bdm; - } - } - - //Add the key, lengths, and data to the message - bdm->keys[bdm->num_keys] = keys[i]; - bdm->key_lens[bdm->num_keys] = key_lens[i]; - bdm->num_keys++; - } - - //Make a list out of the received messages to return - brm_head = client_bdelete(md, index, bdm_list); - if (lbdm) { - rm = local_client_bdelete(md, lbdm); - brm = malloc(sizeof(struct mdhim_brm_t)); - brm->error = rm->error; - brm->basem.mtype = rm->basem.mtype; - brm->basem.index = rm->basem.index; - brm->basem.index_type = rm->basem.index_type; - brm->basem.server_rank = rm->basem.server_rank; - brm->next = brm_head; - brm_head = brm; - free(rm); - } - - for (i = 0; i < index->num_rangesrvs; i++) { - if (!bdm_list[i]) { - continue; - } - - free(bdm_list[i]->keys); - free(bdm_list[i]->key_lens); - free(bdm_list[i]); - } - - free(bdm_list); - - //Return the head of the list - return brm_head; -} diff --git a/meta/src/mdhim_private.h b/meta/src/mdhim_private.h deleted file mode 100644 index f6177b2a7..000000000 --- a/meta/src/mdhim_private.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * - * Copyright 2017, UT-Battelle, LLC. - * - * LLNL-CODE-741539 - * All rights reserved. - * - * This is the license for UnifyFS. - * For details, see https://github.com/LLNL/UnifyFS. - * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. - */ - -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * Copyright (c) 2017, Florida State University. Contributions from - * the Computer Architecture and Systems Research Laboratory (CASTL) - * at the Department of Computer Science. - * - * Written by: Teng Wang, Adam Moody, Weikuan Yu, Kento Sato, Kathryn Mohror - * LLNL-CODE-728877. All rights reserved. - * - * This file is part of burstfs. - * For details, see https://github.com/llnl/burstfs - * Please read https://github.com/llnl/burstfs/LICENSE for full license text. - */ - -/* - * - * Copyright (c) 2014, Los Alamos National Laboratory - * All rights reserved. - * - */ - -#include "mdhim.h" - -struct mdhim_rm_t *_put_record(struct mdhim_t *md, struct index_t *index, - void *key, int key_len, - void *value, int value_len); -struct mdhim_brm_t *_create_brm(struct mdhim_rm_t *rm); -void _concat_brm(struct mdhim_brm_t *head, struct mdhim_brm_t *addition); -struct mdhim_brm_t *_bput_records(struct mdhim_t *md, struct index_t *index, - void **keys, int *key_lens, - void **values, int *value_lens, int num_records); -struct mdhim_bgetrm_t *_bget_records(struct mdhim_t *md, struct index_t *index, - void **keys, int *key_lens, - int num_keys, int num_records, int op); - -struct mdhim_bgetrm_t *_bget_range_records(struct mdhim_t *md, struct index_t *index, - void *start_key, void *end_key, int key_len); - -struct mdhim_brm_t *_bdel_records(struct mdhim_t *md, struct index_t *index, - void **keys, int *key_lens, - int num_records); diff --git a/meta/src/messages.c b/meta/src/messages.c deleted file mode 100644 index ce452f50d..000000000 --- a/meta/src/messages.c +++ /dev/null @@ -1,2218 +0,0 @@ -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * - * Copyright 2017, UT-Battelle, LLC. - * - * LLNL-CODE-741539 - * All rights reserved. - * - * This is the license for UnifyFS. - * For details, see https://github.com/LLNL/UnifyFS. - * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. - */ - -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * Copyright (c) 2017, Florida State University. Contributions from - * the Computer Architecture and Systems Research Laboratory (CASTL) - * at the Department of Computer Science. - * - * Written by: Teng Wang, Adam Moody, Weikuan Yu, Kento Sato, Kathryn Mohror - * LLNL-CODE-728877. All rights reserved. - * - * This file is part of burstfs. - * For details, see https://github.com/llnl/burstfs - * Please read https://github.com/llnl/burstfs/LICENSE for full license text. - */ - -/* - * - * Copyright (c) 2014, Los Alamos National Laboratory - * All rights reserved. - * - */ - - -#include -#include -#include "mdhim.h" -#include "partitioner.h" -#include "messages.h" -#include -#include - -struct timeval recv_comm_start, recv_comm_end; -double recv_comm_time = 0; - -struct timeval packputstart, packputend; -double packputtime = 0; -struct timeval packgetstart, packgetend; -double packgettime = 0; -struct timeval packmpiputstart, packmpiputend; -double packmpiputtime = 0; -struct timeval packretgetstart, packretgetend; -double packretgettime = 0; -struct timeval packretputstart, packretputend; -double packretputtime = 0; - -void test_req_and_wait(struct mdhim_t *md, MPI_Request *req) { - int flag; - MPI_Status status; - int done = 0; - - while (!done) { - pthread_mutex_lock(md->mdhim_comm_lock); - (void) MPI_Test(req, &flag, &status); - //Unlock the mdhim_comm_lock - pthread_mutex_unlock(md->mdhim_comm_lock); - - if (flag) { - done = 1; - } else { - usleep(100); - } - } -} - -/** - * send_rangesrv_work - * Sends a message to the range server at the given destination - * - * @param md main MDHIM struct - * @param dest destination to send to - * @param message pointer to message struct to send - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - */ -int send_rangesrv_work(struct mdhim_t *md, int dest, void *message) { - int return_code = MDHIM_ERROR; - void *sendbuf = NULL; - int sendsize = 0; - int mtype; - MPI_Request *req; - - //Pack the work message in into sendbuf and set sendsize - mtype = ((struct mdhim_basem_t *) message)->mtype; - switch(mtype) { - case MDHIM_PUT: - return_code = pack_put_message(md, (struct mdhim_putm_t *)message, &sendbuf, - &sendsize); - break; - case MDHIM_BULK_PUT: - return_code = pack_bput_message(md, (struct mdhim_bputm_t *)message, &sendbuf, - &sendsize); - break; - case MDHIM_BULK_GET: - return_code = pack_bget_message(md, (struct mdhim_bgetm_t *)message, &sendbuf, - &sendsize); - break; - case MDHIM_DEL: - return_code = pack_del_message(md, (struct mdhim_delm_t *)message, &sendbuf, - &sendsize); - break; - case MDHIM_BULK_DEL: - return_code = pack_bdel_message(md, (struct mdhim_bdelm_t *)message, &sendbuf, - &sendsize); - break; - case MDHIM_COMMIT: - return_code = pack_base_message(md, (struct mdhim_basem_t *)message, &sendbuf, - &sendsize); - break; - case MDHIM_CLOSE: - return_code = pack_base_message(md, (struct mdhim_basem_t *)message, &sendbuf, - &sendsize); - break; - default: - break; - } - - if (return_code != MDHIM_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: Packing message " - "failed before sending.", md->mdhim_rank); - return MDHIM_ERROR; - } - - req = malloc(sizeof(MPI_Request)); - //Send the size of the message - pthread_mutex_lock(md->mdhim_comm_lock); - return_code = MPI_Isend(&sendsize, 1, MPI_INT, dest, RANGESRV_WORK_SIZE_MSG, - md->mdhim_comm, req); - pthread_mutex_unlock(md->mdhim_comm_lock); - test_req_and_wait(md, req); - - if (return_code != MPI_SUCCESS) { - mlog(MPI_CRIT, "Rank: %d - " - "Error sending work size message in send_rangesrv_work", - md->mdhim_rank); - free(req); - return MDHIM_ERROR; - } - - //Send the message - pthread_mutex_lock(md->mdhim_comm_lock); - return_code = MPI_Isend(sendbuf, sendsize, MPI_PACKED, dest, RANGESRV_WORK_MSG, - md->mdhim_comm, req); - pthread_mutex_unlock(md->mdhim_comm_lock); - test_req_and_wait(md, req); - free(req); - - if (return_code != MPI_SUCCESS) { - mlog(MPI_CRIT, "Rank: %d - " - "Error sending work message in send_rangesrv_work", - md->mdhim_rank); - return MDHIM_ERROR; - } - - free(sendbuf); - return MDHIM_SUCCESS; -} - -/** - * send_all_rangesrv_work - * Sends multiple messages simultaneously and waits for them to all complete - * - * @param md main MDHIM struct - * @param messages double pointer to array of messages to send - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - */ -int send_all_rangesrv_work(struct mdhim_t *md, void **messages, int num_srvs) { - int return_code = MDHIM_ERROR; - void *sendbuf = NULL; - void **sendbufs; - int *sizes; - int sendsize = 0; - int mtype; - MPI_Request **reqs, **size_reqs; - MPI_Request *req; - int num_msgs; - int i, ret, flag, done; - void *mesg; - MPI_Status status; - int dest; - - ret = MDHIM_SUCCESS; - num_msgs = 0; - reqs = malloc(sizeof(MPI_Request *) * num_srvs); - size_reqs = malloc(sizeof(MPI_Request *) * num_srvs); - memset(reqs, 0, sizeof(MPI_Request *) * num_srvs); - memset(size_reqs, 0, sizeof(MPI_Request *) * num_srvs); - sendbufs = malloc(sizeof(void *) * num_srvs); - memset(sendbufs, 0, sizeof(void *) * num_srvs); - sizes = malloc(sizeof(int) * num_srvs); - memset(sizes, 0, sizeof(int) * num_srvs); - done = 0; - - //Send all messages at once - for (i = 0; i < num_srvs; i++) { - mesg = *(messages + i); - if (!mesg) { - continue; - } - - mtype = ((struct mdhim_basem_t *) mesg)->mtype; - dest = ((struct mdhim_basem_t *) mesg)->server_rank; - //Pack the work message in into sendbuf and set sendsize - switch(mtype) { - case MDHIM_BULK_PUT: - gettimeofday(&packputstart, NULL); - return_code = pack_bput_message(md, (struct mdhim_bputm_t *)mesg, &sendbuf, - &sendsize); - gettimeofday(&packputend, NULL); - packputtime += 1000000 * (packputend.tv_sec - packputstart.tv_sec)+\ - packputend.tv_usec - packputstart.tv_usec; - break; - case MDHIM_BULK_GET: - gettimeofday(&packgetstart, NULL); - return_code = pack_bget_message(md, (struct mdhim_bgetm_t *)mesg, &sendbuf, - &sendsize); - gettimeofday(&packgetend, NULL); - packgettime+=1000000 * (packgetend.tv_sec - packgetstart.tv_sec) + \ - packgetend.tv_usec - packgetstart.tv_usec; - break; - case MDHIM_BULK_DEL: - return_code = pack_bdel_message(md, (struct mdhim_bdelm_t *)mesg, &sendbuf, - &sendsize); - break; - default: - break; - } - - if (return_code != MDHIM_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: Packing message " - "failed before sending.", md->mdhim_rank); - ret = MDHIM_ERROR; - continue; - } - - sendbufs[num_msgs] = sendbuf; - sizes[num_msgs] = sendsize; - req = malloc(sizeof(MPI_Request)); - size_reqs[num_msgs] = req; - - pthread_mutex_lock(md->mdhim_comm_lock); - return_code = MPI_Isend(&sizes[num_msgs], 1, MPI_INT, dest, RANGESRV_WORK_SIZE_MSG, - md->mdhim_comm, req); - pthread_mutex_unlock(md->mdhim_comm_lock); - - if (return_code != MPI_SUCCESS) { - mlog(MPI_CRIT, "Rank: %d - " - "Error sending work message in send_rangesrv_work", - md->mdhim_rank); - ret = MDHIM_ERROR; - } - - req = malloc(sizeof(MPI_Request)); - reqs[num_msgs] = req; - - pthread_mutex_lock(md->mdhim_comm_lock); - return_code = MPI_Isend(sendbuf, sizes[num_msgs], MPI_PACKED, dest, RANGESRV_WORK_MSG, - md->mdhim_comm, req); - pthread_mutex_unlock(md->mdhim_comm_lock); - - if (return_code != MPI_SUCCESS) { - mlog(MPI_CRIT, "Rank: %d - " - "Error sending work message in send_rangesrv_work", - md->mdhim_rank); - ret = MDHIM_ERROR; - } - - num_msgs++; - } - - //Wait for messages to complete - while (done != num_msgs * 2) { - for (i = 0; i < num_msgs; i++) { - req = size_reqs[i]; - if (!req) { - continue; - } - - //Lock the mdhim_comm_lock - pthread_mutex_lock(md->mdhim_comm_lock); - ret = MPI_Test(req, &flag, &status); - //Unlock the mdhim_comm_lock - pthread_mutex_unlock(md->mdhim_comm_lock); - - if (flag) { - free(req); - size_reqs[i] = NULL; - done++; - } - } - for (i = 0; i < num_msgs; i++) { - req = reqs[i]; - if (!req) { - continue; - } - - pthread_mutex_lock(md->mdhim_comm_lock); - ret = MPI_Test(req, &flag, &status); - pthread_mutex_unlock(md->mdhim_comm_lock); - - if (flag) { - free(req); - reqs[i] = NULL; - done++; - } - } - - if (done != num_msgs * 2) { - usleep(100); - } - } - - for (i = 0; i < num_msgs; i++) { - if (!sendbufs[i]) { - continue; - } - free(sendbufs[i]); - } - - free(sendbufs); - free(size_reqs); - free(sizes); - free(reqs); - - return ret; -} - -/** - * receive_rangesrv_work message - * Receives a message from the given source - * - * @param md in main MDHIM struct - * @param message out double pointer for message received - * @param src out pointer to source of message received - * @return MDHIM_SUCCESS, MDHIM_CLOSE, MDHIM_COMMIT, or MDHIM_ERROR on error - */ -int receive_rangesrv_work(struct mdhim_t *md, int *src, void **message) { - MPI_Status status; - int return_code; - int msg_size; - int msg_source; - void *recvbuf; - int recvsize; - int mtype; - struct mdhim_basem_t *bm; - int mesg_idx = 0; - MPI_Request *req; - int flag = 0; - int ret = MDHIM_SUCCESS; - - // Receive a message from any client - flag = 0; - - req = malloc(sizeof(MPI_Request)); - pthread_mutex_lock(md->mdhim_comm_lock); - gettimeofday(&recv_comm_start, NULL); - return_code = MPI_Irecv(&recvsize,1, MPI_INT, MPI_ANY_SOURCE, RANGESRV_WORK_SIZE_MSG, - md->mdhim_comm, req); - gettimeofday(&recv_comm_end, NULL); - recv_comm_time += 1000000*(recv_comm_end.tv_sec-recv_comm_start.tv_sec)+recv_comm_end.tv_usec-recv_comm_start.tv_usec; - pthread_mutex_unlock(md->mdhim_comm_lock); - - // If the receive did not succeed then return the error code back - if ( return_code != MPI_SUCCESS ) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - Error: %d " - "receive size message failed.", md->mdhim_rank, return_code); - free(req); - return MDHIM_ERROR; - } - - gettimeofday(&recv_comm_start, NULL); - while (!flag) { - if (md->shutdown) { - free(req); - return MDHIM_ERROR; - } - - pthread_mutex_lock(md->mdhim_comm_lock); - return_code = MPI_Test(req, &flag, &status); - pthread_mutex_unlock(md->mdhim_comm_lock); - usleep(100); - } - gettimeofday(&recv_comm_end, NULL); - recv_comm_time += 1000000*(recv_comm_end.tv_sec-recv_comm_start.tv_sec)+recv_comm_end.tv_usec-recv_comm_start.tv_usec; - - if (return_code == MPI_ERR_IN_STATUS) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - Received an error status: %d " - " while receiving work message size", md->mdhim_rank, status.MPI_ERROR); - } - recvbuf = (void *) malloc(recvsize); - memset(recvbuf, 0, recvsize); - flag = 0; - - pthread_mutex_lock(md->mdhim_comm_lock); - gettimeofday(&recv_comm_start, NULL); - return_code = MPI_Irecv(recvbuf, recvsize, MPI_PACKED, status.MPI_SOURCE, - RANGESRV_WORK_MSG, md->mdhim_comm, req); - gettimeofday(&recv_comm_end, NULL); - recv_comm_time += 1000000*(recv_comm_end.tv_sec-recv_comm_start.tv_sec)+recv_comm_end.tv_usec-recv_comm_start.tv_usec; - pthread_mutex_unlock(md->mdhim_comm_lock); - // If the receive did not succeed then return the error code back - if ( return_code != MPI_SUCCESS ) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - Error: %d " - "receive message failed.", md->mdhim_rank, return_code); - free(recvbuf); - free(req); - return MDHIM_ERROR; - } - gettimeofday(&recv_comm_start, NULL); - while (!flag) { - if (md->shutdown) { - return MDHIM_ERROR; - } - - pthread_mutex_lock(md->mdhim_comm_lock); - return_code = MPI_Test(req, &flag, &status); - pthread_mutex_unlock(md->mdhim_comm_lock); - - usleep(100); - } - gettimeofday(&recv_comm_end, NULL); - recv_comm_time += 1000000*(recv_comm_end.tv_sec-recv_comm_start.tv_sec)+recv_comm_end.tv_usec-recv_comm_start.tv_usec; - - free(req); - if (return_code == MPI_ERR_IN_STATUS) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - Received an error status: %d " - " while receiving work message size", md->mdhim_rank, status.MPI_ERROR); - } - if (!recvbuf) { - return MDHIM_ERROR; - } - - msg_source = status.MPI_SOURCE; - *src = msg_source; - *message = NULL; - //Unpack buffer to get the message type - bm = malloc(sizeof(struct mdhim_basem_t)); - return_code = MPI_Unpack(recvbuf, recvsize, &mesg_idx, bm, - sizeof(struct mdhim_basem_t), MPI_CHAR, - md->mdhim_comm); - mtype = bm->mtype; - msg_size = bm->size; - free(bm); - - // Checks for valid message, if error inform and ignore message - if (msg_size==0 || mtypeMDHIM_COMMIT) { - mlog(MDHIM_SERVER_CRIT, "Rank: %d - Got empty/invalid message in receive_rangesrv_work.", - md->mdhim_rank); - free(recvbuf); - return MDHIM_ERROR; - } - switch(mtype) { - case MDHIM_PUT: - return_code = unpack_put_message(md, recvbuf, msg_size, message); - break; - case MDHIM_BULK_PUT: - return_code = unpack_bput_message(md, recvbuf, msg_size, message); - break; - case MDHIM_BULK_GET: - return_code = unpack_bget_message(md, recvbuf, msg_size, message); - break; - case MDHIM_DEL: - return_code = unpack_del_message(md, recvbuf, msg_size, message); - break; - case MDHIM_BULK_DEL: - return_code = unpack_bdel_message(md, recvbuf, msg_size, message); - break; - case MDHIM_COMMIT: - ret = MDHIM_COMMIT; - break; - case MDHIM_CLOSE: - ret = MDHIM_CLOSE; - break; - default: - break; - } - if (return_code != MPI_SUCCESS) { - mlog(MPI_CRIT, "Rank: %d - " - "Error unpacking message in receive_rangesrv_work", - md->mdhim_rank); - ret = MDHIM_ERROR; - } - - free(recvbuf); - - return ret; -} - -/** - * send_client_response - * Sends a message to a client - * - * @param md main MDHIM struct - * @param dest destination to send to - * @param message pointer to message to send - * @param sendbuf double pointer to packed message - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - */ -int send_client_response(struct mdhim_t *md, int dest, void *message, int *sizebuf, - void **sendbuf, MPI_Request **size_req, MPI_Request **msg_req) { - int return_code = 0; - int mtype; - int ret = MDHIM_SUCCESS; - - *size_req = NULL; - *msg_req = NULL; - *sendbuf = NULL; - //Pack the client response in the message pointer into sendbuf and set sendsize - mtype = ((struct mdhim_basem_t *) message)->mtype; - switch(mtype) { - case MDHIM_RECV: - gettimeofday(&packretputstart, NULL); - return_code = pack_return_message(md, (struct mdhim_rm_t *)message, sendbuf, - sizebuf); - gettimeofday(&packretputend, NULL); - packretputtime+=1000000 * (packretputend.tv_sec - \ - packretputstart.tv_sec) + packretputend.tv_usec - \ - packretputstart.tv_usec; - break; - case MDHIM_RECV_BULK_GET: - gettimeofday(&packretgetstart, NULL); - return_code = pack_bgetrm_message(md, (struct mdhim_bgetrm_t *)message, sendbuf, - sizebuf); - gettimeofday(&packretgetend, NULL); - packretgettime+=1000000 * (packretgetend.tv_sec - \ - packretgetstart.tv_sec) + packretgetend.tv_usec - \ - packretgetstart.tv_usec; - break; - default: - break; - } - - if (return_code != MDHIM_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to pack " - "the message while sending.", md->mdhim_rank); - ret = MDHIM_ERROR; - } - - //Send the size message - *size_req = malloc(sizeof(MPI_Request)); - - pthread_mutex_lock(md->mdhim_comm_lock); - return_code = MPI_Isend(sizebuf, 1, MPI_INT, dest, CLIENT_RESPONSE_SIZE_MSG, - md->mdhim_comm, *size_req); - pthread_mutex_unlock(md->mdhim_comm_lock); - - if (return_code != MPI_SUCCESS) { - mlog(MPI_CRIT, "Rank: %d - " - "Error sending client response message size in send_client_response", - md->mdhim_rank); - ret = MDHIM_ERROR; - free(*size_req); - *size_req = NULL; - } - - *msg_req = malloc(sizeof(MPI_Request)); - //Send the actual message - - pthread_mutex_lock(md->mdhim_comm_lock); - return_code = MPI_Isend(*sendbuf, *sizebuf, MPI_PACKED, dest, CLIENT_RESPONSE_MSG, - md->mdhim_comm, *msg_req); - pthread_mutex_unlock(md->mdhim_comm_lock); - - if (return_code != MPI_SUCCESS) { - mlog(MPI_CRIT, "Rank: %d - " - "Error sending client response message in send_client_response", - md->mdhim_rank); - ret = MDHIM_ERROR; - free(*msg_req); - *msg_req = NULL; - } - - return ret; -} - - -/** - * receive_client_response message - * Receives a message from the given source - * - * @param md in main MDHIM struct - * @param src in source to receive from - * @param message out double pointer for message received - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - */ -int receive_client_response(struct mdhim_t *md, int src, void **message) { - int return_code; - int msg_size; - int mtype; - int mesg_idx = 0; - void *recvbuf; - struct mdhim_basem_t *bm; - MPI_Request *req; - - req = malloc(sizeof(MPI_Request)); - pthread_mutex_lock(md->mdhim_comm_lock); - return_code = MPI_Irecv(&msg_size, 1, MPI_INT, src, CLIENT_RESPONSE_SIZE_MSG, - md->mdhim_comm, req); - pthread_mutex_unlock(md->mdhim_comm_lock); - test_req_and_wait(md, req); - - // If the receive did not succeed then return the error code back - if ( return_code != MPI_SUCCESS ) { - mlog(MPI_CRIT, "Rank: %d - " - "Error receiving message in receive_client_response", - md->mdhim_rank); - free(req); - return MDHIM_ERROR; - } - - recvbuf = malloc(msg_size); - memset(recvbuf, 0, msg_size); - - pthread_mutex_lock(md->mdhim_comm_lock); - return_code = MPI_Irecv(recvbuf, msg_size, MPI_PACKED, src, CLIENT_RESPONSE_MSG, - md->mdhim_comm, req); - pthread_mutex_unlock(md->mdhim_comm_lock); - test_req_and_wait(md, req); - free(req); - - // If the receive did not succeed then return the error code back - if ( return_code != MPI_SUCCESS ) { - mlog(MPI_CRIT, "Rank: %d - " - "Error receiving message in receive_client_response", - md->mdhim_rank); - return MDHIM_ERROR; - } - - //Received the message - *message = NULL; - bm = malloc(sizeof(struct mdhim_basem_t)); - //Unpack buffer to get the message type - return_code = MPI_Unpack(recvbuf, msg_size, &mesg_idx, bm, - sizeof(struct mdhim_basem_t), MPI_CHAR, - md->mdhim_comm); - mtype = bm->mtype; - msg_size = bm->size; - free(bm); - switch(mtype) { - case MDHIM_RECV: - return_code = unpack_return_message(md, recvbuf, message); - break; - case MDHIM_RECV_BULK_GET: - return_code = unpack_bgetrm_message(md, recvbuf, msg_size, message); - break; - default: - break; - } - - if (return_code != MDHIM_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to unpack " - "the message while receiving from client.", md->mdhim_rank); - return MDHIM_ERROR; - } - - free(recvbuf); - - return MDHIM_SUCCESS; -} - -/** - * receive_all_client_responses - * Receives messages from multiple sources sources - * - * @param md in main MDHIM struct - * @param srcs in sources to receive from - * @param nsrcs in number of sources to receive from - * @param messages out array of messages to receive - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - */ -int receive_all_client_responses(struct mdhim_t *md, int *srcs, int nsrcs, - void ***messages) { - MPI_Status status; - int return_code; - int mtype; - int mesg_idx = 0; - void *recvbuf, **recvbufs; - int *sizebuf; - struct mdhim_basem_t *bm; - int i; - int ret = MDHIM_SUCCESS; - MPI_Request **reqs, *req; - int done = 0; - int flag = 0; - int msg_size; - - sizebuf = malloc(sizeof(int) * nsrcs); - memset(sizebuf, 0, sizeof(int) * nsrcs); - reqs = malloc(nsrcs * sizeof(MPI_Request *)); - memset(reqs, 0, nsrcs * sizeof(MPI_Request *)); - recvbufs = malloc(nsrcs * sizeof(void *)); - memset(recvbufs, 0, nsrcs * sizeof(void *)); - done = 0; - for (i = 0; i < nsrcs; i++) { - // Receive a size message from the servers in the list - req = malloc(sizeof(MPI_Request)); - reqs[i] = req; - - pthread_mutex_lock(md->mdhim_comm_lock); - return_code = MPI_Irecv(&sizebuf[i], 1, MPI_INT, - srcs[i], CLIENT_RESPONSE_SIZE_MSG, - md->mdhim_comm, req); - pthread_mutex_unlock(md->mdhim_comm_lock); - - // If the receive did not succeed then return the error code back - if ( return_code != MPI_SUCCESS ) { - mlog(MPI_CRIT, "Rank: %d - " - "Error receiving message in receive_client_response", - md->mdhim_rank); - ret = MDHIM_ERROR; - } - } - - //Wait for size messages to complete - while (done != nsrcs) { - for (i = 0; i < nsrcs; i++) { - req = reqs[i]; - if (!req) { - continue; - } - - pthread_mutex_lock(md->mdhim_comm_lock); - return_code = MPI_Test(req, &flag, &status); - pthread_mutex_unlock(md->mdhim_comm_lock); - - if (return_code == MPI_ERR_REQUEST) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - Received an error status: %d " - " while receiving client response message size", - md->mdhim_rank, status.MPI_ERROR); - } - if (!flag) { - continue; - } - free(req); - reqs[i] = NULL; - done++; - } - - if (done != nsrcs) { - usleep(100); - } - } - - done = 0; - for (i = 0; i < nsrcs; i++) { - // Receive a message from the servers in the list - recvbuf = malloc(sizebuf[i]); - recvbufs[i] = recvbuf; - req = malloc(sizeof(MPI_Request)); - reqs[i] = req; - - pthread_mutex_lock(md->mdhim_comm_lock); - return_code = MPI_Irecv(recvbuf, sizebuf[i], MPI_PACKED, - srcs[i], CLIENT_RESPONSE_MSG, - md->mdhim_comm, req); - pthread_mutex_unlock(md->mdhim_comm_lock); - - // If the receive did not succeed then return the error code back - if ( return_code != MPI_SUCCESS ) { - mlog(MPI_CRIT, "Rank: %d - " - "Error receiving message in receive_client_response", - md->mdhim_rank); - ret = MDHIM_ERROR; - } - } - - //Wait for messages to complete - while (done != nsrcs) { - for (i = 0; i < nsrcs; i++) { - req = reqs[i]; - if (!req) { - continue; - } - - pthread_mutex_lock(md->mdhim_comm_lock); - return_code = MPI_Test(req, &flag, &status); - pthread_mutex_unlock(md->mdhim_comm_lock); - - if (return_code == MPI_ERR_REQUEST) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - Received an error status: %d " - " while receiving work message size", md->mdhim_rank, status.MPI_ERROR); - } - if (!flag) { - continue; - } - free(req); - reqs[i] = NULL; - done++; - } - - if (done != nsrcs) { - usleep(100); - } - } - - free(reqs); - for (i = 0; i < nsrcs; i++) { - recvbuf = recvbufs[i]; - //Received the message - *(*messages + i) = NULL; - bm = malloc(sizeof(struct mdhim_basem_t)); - //Unpack buffer to get the message type - mesg_idx = 0; - return_code = MPI_Unpack(recvbuf, sizebuf[i], &mesg_idx, bm, - sizeof(struct mdhim_basem_t), MPI_CHAR, - md->mdhim_comm); - mtype = bm->mtype; - msg_size = bm->size; - free(bm); - switch(mtype) { - case MDHIM_RECV: - return_code = unpack_return_message(md, recvbuf, - (*messages + i)); - break; - case MDHIM_RECV_BULK_GET: - return_code = unpack_bgetrm_message(md, recvbuf, msg_size, - (*messages + i)); - break; - default: - break; - } - - if (return_code != MDHIM_SUCCESS) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to unpack " - "the message while receiving from client.", md->mdhim_rank); - ret = MDHIM_ERROR; - } - - free(recvbuf); - } - - free(recvbufs); - free(sizebuf); - - return ret; -} - -///------------------------ - -/** - * pack_put_message - * Packs a put message structure into contiguous memory for message passing - * - * @param md in main MDHIM struct - * @param pm in structure put_message which will be packed into the sendbuf - * @param sendbuf out double pointer for packed message to send - * @param sendsize out pointer to size of sendbuf - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - * - * struct mdhim_putm_t { - int mtype; - void *key; - int key_len; - void *data; - int data_len; - int server_rank; - }; -*/ -int pack_put_message(struct mdhim_t *md, struct mdhim_putm_t *pm, void **sendbuf, int *sendsize) { - int return_code = MPI_SUCCESS; // MPI_SUCCESS = 0 - int64_t m_size = sizeof(struct mdhim_putm_t); // Generous variable for size calculation - int mesg_size; // Variable to be used as parameter for MPI_pack of safe size - int mesg_idx = 0; // Variable for incremental pack - void *outbuf; - - // Add to size the length of the key and data fields - m_size += pm->key_len + pm->value_len; - if (m_size > MDHIM_MAX_MSG_SIZE) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: put message too large." - " Put is over Maximum size allowed of %d.", md->mdhim_rank, MDHIM_MAX_MSG_SIZE); - return MDHIM_ERROR; - } - - //Set output variable for the size to send - mesg_size = (int) m_size; - *sendsize = mesg_size; - pm->basem.size = mesg_size; - - // Is the computed message size of a safe value? (less than a max message size?) - if ((*sendbuf = malloc(mesg_size * sizeof(char))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to pack put message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - outbuf = *sendbuf; - // pack the message first with the structure and then followed by key and data values. - return_code = MPI_Pack(pm, sizeof(struct mdhim_putm_t), MPI_CHAR, outbuf, mesg_size, - &mesg_idx, md->mdhim_comm); - return_code += MPI_Pack(pm->key, pm->key_len, MPI_CHAR, outbuf, mesg_size, &mesg_idx, - md->mdhim_comm); - return_code += MPI_Pack(pm->value, pm->value_len, MPI_CHAR, outbuf, mesg_size, &mesg_idx, - md->mdhim_comm); - - // If the pack did not succeed then log the error and return the error code - if ( return_code != MPI_SUCCESS ) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to pack " - "the put message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - return MDHIM_SUCCESS; -} - -/** - * pack_bput_message - * Packs a bulk put message structure into contiguous memory for message passing - * - * @param md in main MDHIM struct - * @param bpm in structure bput_message which will be packed into the sendbuf - * @param sendbuf out double pointer for packed message to send - * @param sendsize out pointer for packed message size - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - * - * struct mdhim_bputm_t { - int mtype; - void **keys; - int *key_lens; - void **data; - int *data_lens; - int num_records; - int server_rank; - }; -*/ -int pack_bput_message(struct mdhim_t *md, struct mdhim_bputm_t *bpm, void **sendbuf, int *sendsize) { - int return_code = MPI_SUCCESS; // MPI_SUCCESS = 0 - int64_t m_size = sizeof(struct mdhim_bputm_t); // Generous variable for size calc - int mesg_size; // Variable to be used as parameter for MPI_pack of safe size - int mesg_idx = 0; - int i; - - // Add the sizes of the length arrays (key_lens and data_lens) - m_size += 2 * bpm->num_keys * sizeof(int); - - // For the each of the keys and data add enough chars. - for (i=0; i < bpm->num_keys; i++) - m_size += bpm->key_lens[i] + bpm->value_lens[i]; - - // Is the computed message size of a safe value? (less than a max message size?) - if (m_size > MDHIM_MAX_MSG_SIZE) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: bulk put message too large." - " Bput is over Maximum size allowed of %d.", md->mdhim_rank, MDHIM_MAX_MSG_SIZE); - return MDHIM_ERROR; - } - mesg_size = m_size; // Safe size to use in MPI_pack - *sendsize = mesg_size; - bpm->basem.size = mesg_size; - - if ((*sendbuf = malloc(mesg_size * sizeof(char))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to pack bulk put message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - // pack the message first with the structure and then followed by key and data values (plus lengths). - return_code = MPI_Pack(bpm, sizeof(struct mdhim_bputm_t), MPI_CHAR, *sendbuf, - mesg_size, &mesg_idx, md->mdhim_comm); - - // For the each of the keys and data pack the chars plus two ints for key_len and data_len. - gettimeofday(&packmpiputstart, NULL); - for (i=0; i < bpm->num_keys; i++) { - return_code += MPI_Pack(&bpm->key_lens[i], 1, MPI_INT, - *sendbuf, mesg_size, &mesg_idx, md->mdhim_comm); - return_code += MPI_Pack(bpm->keys[i], bpm->key_lens[i], MPI_CHAR, - *sendbuf, mesg_size, &mesg_idx, md->mdhim_comm); - return_code += MPI_Pack(&bpm->value_lens[i], 1, MPI_INT, - *sendbuf, mesg_size, &mesg_idx, md->mdhim_comm); - return_code += MPI_Pack(bpm->values[i], bpm->value_lens[i], MPI_CHAR, - *sendbuf, mesg_size, &mesg_idx, md->mdhim_comm); - } - gettimeofday(&packmpiputend, NULL); - packmpiputtime += 1000000 * - (packmpiputend.tv_sec - packmpiputstart.tv_sec) + - packmpiputend.tv_usec - packmpiputstart.tv_usec; - - // If the pack did not succeed then log the error and return the error code - if ( return_code != MPI_SUCCESS ) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to pack " - "the bulk put message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - return MDHIM_SUCCESS; -} - -/** - * unpack_put_message - * Unpacks a put message structure into contiguous memory for message passing - * - * @param md in main MDHIM struct - * @param message in pointer for packed message we received - * @param mesg_size in size of the incoming message - * @param putm out put message which will be unpacked from the message - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - * - * struct mdhim_putm_t { - int mtype; - void *key; - int key_len; - void *data; - int data_len; - int server_rank; - }; -*/ -int unpack_put_message(struct mdhim_t *md, void *message, int mesg_size, void **putm) { - - int return_code = MPI_SUCCESS; // MPI_SUCCESS = 0 - int mesg_idx = 0; // Variable for incremental unpack - struct mdhim_putm_t *pm; - - if ((*((struct mdhim_putm_t **) putm) = malloc(sizeof(struct mdhim_putm_t))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to unpack put message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - pm = *((struct mdhim_putm_t **) putm); - // Unpack the message first with the structure and then followed by key and data values. - return_code = MPI_Unpack(message, mesg_size, &mesg_idx, pm, - sizeof(struct mdhim_putm_t), MPI_CHAR, - md->mdhim_comm); - - // Unpack key by first allocating memory and then extracting the values from message - if ((pm->key = malloc(pm->key_len * sizeof(char))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to unpack put message.", md->mdhim_rank); - return MDHIM_ERROR; - } - return_code += MPI_Unpack(message, mesg_size, &mesg_idx, pm->key, pm->key_len, - MPI_CHAR, md->mdhim_comm); - - // Unpack data by first allocating memory and then extracting the values from message - if ((pm->value = malloc(pm->value_len * sizeof(char))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to unpack put message.", md->mdhim_rank); - return MDHIM_ERROR; - } - return_code += MPI_Unpack(message, mesg_size, &mesg_idx, pm->value, pm->value_len, - MPI_CHAR, md->mdhim_comm); - - // If the unpack did not succeed then log the error and return the error code - if ( return_code != MPI_SUCCESS ) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to unpack " - "the put message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - return MDHIM_SUCCESS; -} - -/** - * unpack_bput_message - * Unpacks a bulk put message structure into contiguous memory for message passing - * - * @param md in main MDHIM struct - * @param message in pointer for packed message we received - * @param mesg_size in size of the incoming message - * @param bput out bulk put message which will be unpacked from the message - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - * - * struct mdhim_bputm_t { - int mtype; - void **keys; - int *key_lens; - void **data; - int *data_lens; - int num_records; - int server_rank; - }; -*/ -int unpack_bput_message(struct mdhim_t *md, void *message, int mesg_size, void **bput) { - - int return_code = MPI_SUCCESS; // MPI_SUCCESS = 0 - int mesg_idx = 0; // Variable for incremental unpack - int i; - int num_records; - - if ((*((struct mdhim_bputm_t **) bput) = malloc(sizeof(struct mdhim_bputm_t))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to unpack bput message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - // Unpack the message first with the structure and then followed by key and data values. - return_code = MPI_Unpack(message, mesg_size, &mesg_idx, *((struct mdhim_bputm_t **) bput), - sizeof(struct mdhim_bputm_t), - MPI_CHAR, md->mdhim_comm); - - num_records = (*((struct mdhim_bputm_t **) bput))->num_keys; - // Allocate memory for key pointers, to be populated later. - if (((*((struct mdhim_bputm_t **) bput))->keys = - malloc(num_records * sizeof(void *))) == NULL) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to unpack bput message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - // Allocate memory for value pointers, to be populated later. - if (((*((struct mdhim_bputm_t **) bput))->values = - malloc(num_records * sizeof(void *))) == NULL) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to unpack bput message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - // Allocate memory for key_lens, to be populated later. - if (((*((struct mdhim_bputm_t **) bput))->key_lens = - (int *)malloc(num_records * sizeof(int))) == NULL) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to unpack bput message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - // Allocate memory for value_lens, to be populated later. - if (((*((struct mdhim_bputm_t **) bput))->value_lens = - (int *)malloc(num_records * sizeof(int))) == NULL) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to unpack bput message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - // For the each of the keys and data unpack the chars plus two ints for key_lens[i] and data_lens[i]. - for (i=0; i < num_records; i++) { - // Unpack the key_lens[i] - return_code += MPI_Unpack(message, mesg_size, &mesg_idx, - &(*((struct mdhim_bputm_t **) bput))->key_lens[i], 1, MPI_INT, - md->mdhim_comm); - - // Unpack key by first allocating memory and then extracting the values from message - if (((*((struct mdhim_bputm_t **) bput))->keys[i] = - malloc((*((struct mdhim_bputm_t **) bput))->key_lens[i] * - sizeof(char))) == NULL) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to unpack bput message.", md->mdhim_rank); - return MDHIM_ERROR; - } - return_code += MPI_Unpack(message, mesg_size, &mesg_idx, - (*((struct mdhim_bputm_t **) bput))->keys[i], - (*((struct mdhim_bputm_t **) bput))->key_lens[i], - MPI_CHAR, md->mdhim_comm); - - // Unpack the data_lens[i] - return_code += MPI_Unpack(message, mesg_size, &mesg_idx, - &(*((struct mdhim_bputm_t **) bput))->value_lens[i], 1, - MPI_INT, - md->mdhim_comm); - - // Unpack data by first allocating memory and then extracting the values from message - if (((*((struct mdhim_bputm_t **) bput))->values[i] = - malloc((*((struct mdhim_bputm_t **) bput))->value_lens[i] * - sizeof(char))) == NULL) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to unpack bput message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - return_code += MPI_Unpack(message, mesg_size, &mesg_idx, - (*((struct mdhim_bputm_t **) bput))->values[i], - (*((struct mdhim_bputm_t **) bput))->value_lens[i], - MPI_CHAR, md->mdhim_comm); - } - - // If the unpack did not succeed then log the error and return the error code - if ( return_code != MPI_SUCCESS ) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to unpack " - "the bput message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - return MDHIM_SUCCESS; -} - -///------------------------ - -/** - * pack_get_message - * Packs a get message structure into contiguous memory for message passing - * - * @param md in main MDHIM struct - * @param gm in structure get_message which will be packed into the sendbuf - * @param sendbuf out double pointer for packed message to send - * @param sendsize out pointer to sendbuf's size - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - * - * struct mdhim_getm_t { - int mtype; - int op; - void *key; - int key_len; - int server_rank; - }; -*/ -int pack_get_message(struct mdhim_t *md, struct mdhim_getm_t *gm, void **sendbuf, int *sendsize) { - - int return_code = MPI_SUCCESS; // MPI_SUCCESS = 0 - int64_t m_size = sizeof(struct mdhim_getm_t); // Generous variable for size calculation - int mesg_size; // Variable to be used as parameter for MPI_pack of safe size - int mesg_idx = 0; // Variable for incremental pack - void *outbuf; - - // Add to size the length of the key and data fields - m_size += gm->key_len; - - if (m_size > MDHIM_MAX_MSG_SIZE) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: get message too large." - " Get is over Maximum size allowed of %d.", md->mdhim_rank, MDHIM_MAX_MSG_SIZE); - return MDHIM_ERROR; - } - mesg_size = m_size; - *sendsize = mesg_size; - gm->basem.size = mesg_size; - - // Is the computed message size of a safe value? (less than a max message size?) - if ((*sendbuf = malloc(mesg_size * sizeof(char))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to pack get message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - outbuf = *sendbuf; - // pack the message first with the structure and then followed by key and data values. - return_code = MPI_Pack(gm, sizeof(struct mdhim_getm_t), MPI_CHAR, outbuf, mesg_size, - &mesg_idx, md->mdhim_comm); - return_code += MPI_Pack(gm->key, gm->key_len, MPI_CHAR, outbuf, mesg_size, - &mesg_idx, md->mdhim_comm); - - // If the pack did not succeed then log the error and return the error code - if ( return_code != MPI_SUCCESS ) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to pack " - "the get message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - return MDHIM_SUCCESS; -} - -/** - * pack_bget_message - * Packs a bget message structure into contiguous memory for message passing - * - * @param md in main MDHIM struct - * @param bgm in structure bget_message which will be packed into the sendbuf - * @param sendbuf out double pointer for packed message to send - * @param sendsize out pointer for sendbuf size - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - * - * struct mdhim_bgetm_t { - int mtype; - int op; - void **keys; - int *key_lens; - int num_records; - int server_rank; - }; -*/ -int pack_bget_message(struct mdhim_t *md, struct mdhim_bgetm_t *bgm, void **sendbuf, int *sendsize) { - int return_code = MPI_SUCCESS; // MPI_SUCCESS = 0 - int64_t m_size = sizeof(struct mdhim_bgetm_t); // Generous variable for size calc - int mesg_size; // Variable to be used as parameter for MPI_pack of safe size - int mesg_idx = 0; - int i; - - // Calculate the size of the message to send - m_size += bgm->num_keys * sizeof(int) * 2; - // For the each of the keys add the size to the length - for (i=0; i < bgm->num_keys; i++) - m_size += bgm->key_lens[i]; - - // Is the computed message size of a safe value? (less than a max message size?) - if (m_size > MDHIM_MAX_MSG_SIZE) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: bulk get message too large." - " Bget is over Maximum size allowed of %d.", md->mdhim_rank, MDHIM_MAX_MSG_SIZE); - return MDHIM_ERROR; - } - mesg_size = m_size; // Safe size to use in MPI_pack - *sendsize = mesg_size; - bgm->basem.size = mesg_size; - - if ((*sendbuf = malloc(mesg_size * sizeof(char))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to pack bulk get message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - // pack the message first with the structure and then followed by key and - // data values (plus lengths). - return_code = MPI_Pack(bgm, sizeof(struct mdhim_bgetm_t), MPI_CHAR, - *sendbuf, mesg_size, - &mesg_idx, md->mdhim_comm); - // For the each of the keys and data pack the chars plus one int for key_len. - for (i=0; i < bgm->num_keys; i++) { - return_code += MPI_Pack(&bgm->key_lens[i], 1, MPI_INT, - *sendbuf, mesg_size, - &mesg_idx, md->mdhim_comm); - return_code += MPI_Pack(bgm->keys[i], bgm->key_lens[i], MPI_CHAR, - *sendbuf, mesg_size, - &mesg_idx, md->mdhim_comm); - - } - - // If the pack did not succeed then log the error and return the error code - if ( return_code != MPI_SUCCESS ) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to pack " - "the bulk get message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - return MDHIM_SUCCESS; -} - -/** - * unpack_get_message - * Unpacks a get message structure into contiguous memory for message passing - * - * @param md in main MDHIM struct - * @param message in pointer for packed message we received - * @param mesg_size in size of the incoming message - * @param getm out get message which will be unpacked from the message - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - * - * struct mdhim_getm_t { - int mtype; - //Operation type e.g., MDHIM_GET_VAL, MDHIM_GET_NEXT, MDHIM_GET_PREV - int op; - void *key; - int key_len; - int server_rank; - }; -*/ -int unpack_get_message(struct mdhim_t *md, void *message, int mesg_size, void **getm) { - int return_code = MPI_SUCCESS; // MPI_SUCCESS = 0 - int mesg_idx = 0; // Variable for incremental unpack - - if ((*((struct mdhim_getm_t **) getm) = malloc(sizeof(struct mdhim_getm_t))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to unpack get message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - // Unpack the message first with the structure and then followed by key and data values. - return_code = MPI_Unpack(message, mesg_size, &mesg_idx, *((struct mdhim_getm_t **) getm), - sizeof(struct mdhim_getm_t), MPI_CHAR, md->mdhim_comm); - - // Unpack key by first allocating memory and then extracting the values from message - if (((*((struct mdhim_getm_t **) getm))->key = - malloc((*((struct mdhim_getm_t **) getm))->key_len * sizeof(char))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to unpack get message.", md->mdhim_rank); - return MDHIM_ERROR; - } - return_code += MPI_Unpack(message, mesg_size, &mesg_idx, - (*((struct mdhim_getm_t **) getm))->key, - (*((struct mdhim_getm_t **) getm))->key_len, - MPI_CHAR, md->mdhim_comm); - - // If the unpack did not succeed then log the error and return the error code - if ( return_code != MPI_SUCCESS ) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to unpack " - "the get message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - return MDHIM_SUCCESS; -} - -/** - * unpack_bget_message - * Unpacks a bulk get message structure into contiguous memory for message passing - * - * @param md in main MDHIM struct - * @param message in pointer for packed message we received - * @param mesg_size in size of the incoming message - * @param bgetm out bulk get message which will be unpacked from the message - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - * - * struct mdhim_bgetm_t { - int mtype; - int op; - void **keys; - int *key_lens; - int num_records; - int server_rank; - }; -*/ -int unpack_bget_message(struct mdhim_t *md, void *message, int mesg_size, void **bgetm) { - int return_code = MPI_SUCCESS; // MPI_SUCCESS = 0 - int mesg_idx = 0; // Variable for incremental unpack - int i; - int num_records; - - if ((*((struct mdhim_bgetm_t **) bgetm) = malloc(sizeof(struct mdhim_bgetm_t))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to unpack bget message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - // Unpack the message first with the structure and then followed by key and data values. - return_code = MPI_Unpack(message, mesg_size, &mesg_idx, *((struct mdhim_bgetm_t **) bgetm), - sizeof(struct mdhim_bgetm_t), - MPI_CHAR, md->mdhim_comm); - - num_records = (*((struct mdhim_bgetm_t **) bgetm))->num_keys; - // Allocate memory for key pointers, to be populated later. - if (((*((struct mdhim_bgetm_t **) bgetm))->keys = - malloc(num_records * sizeof(void *))) == NULL) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to unpack bget message.", md->mdhim_rank); - return MDHIM_ERROR; - } - // Allocate memory for key_lens, to be populated later. - if (((*((struct mdhim_bgetm_t **) bgetm))->key_lens = - (int *)malloc(num_records * sizeof(int))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to unpack bget message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - memset((*((struct mdhim_bgetm_t **) bgetm))->key_lens, 0, num_records * sizeof(int)); - // For the each of the keys and data unpack the chars plus an int for key_lens[i]. - for (i=0; i < num_records; i++) { - // Unpack the key_lens[i] - return_code += MPI_Unpack(message, mesg_size, &mesg_idx, - &(*((struct mdhim_bgetm_t **) bgetm))->key_lens[i], - 1, MPI_INT, md->mdhim_comm); - // Unpack key by first allocating memory and then extracting the values from message - if (((*((struct mdhim_bgetm_t **) bgetm))->keys[i] = - malloc((*((struct mdhim_bgetm_t **) bgetm))->key_lens[i] * - sizeof(char))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to unpack bget message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - return_code += MPI_Unpack(message, mesg_size, &mesg_idx, - (*((struct mdhim_bgetm_t **) bgetm))->keys[i], - (*((struct mdhim_bgetm_t **) bgetm))->key_lens[i], - MPI_CHAR, md->mdhim_comm); - } - - // If the unpack did not succeed then log the error and return the error code - if ( return_code != MPI_SUCCESS ) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to unpack " - "the bget message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - return MDHIM_SUCCESS; -} - -/** - * pack_bgetrm_message - * Packs a bulk get return message structure into contiguous memory for message passing - * - * @param md in main MDHIM struct - * @param bgrm in structure bget_return_message which will be packed into the message - * @param sendbuf out double pointer for packed message to send - * @param sendsize out pointer to sendbuf's size - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - * - * struct mdhim_bgetrm_t { - int mtype; - int error; - void **keys; - int *key_lens; - void **values; - int *value_lens; - int num_records; - }; -*/ -int pack_bgetrm_message(struct mdhim_t *md, struct mdhim_bgetrm_t *bgrm, void **sendbuf, int *sendsize) { - int return_code = MPI_SUCCESS; // MPI_SUCCESS = 0 - int64_t m_size = sizeof(struct mdhim_bgetrm_t); // Generous variable for size calc - int mesg_size; // Variable to be used as parameter for MPI_pack of safe size - int mesg_idx = 0; - int i; - void *outbuf; - - // For each of the lens (key_lens and data_lens) - // WARNING We are treating ints as the same size as char for packing purposes - m_size += 2 * bgrm->num_keys * sizeof(int); - - // For the each of the keys and data add enough chars. - for (i=0; i < bgrm->num_keys; i++) - m_size += bgrm->key_lens[i] + bgrm->value_lens[i]; - - // Is the computed message size of a safe value? (less than a max message size?) - if (m_size > MDHIM_MAX_MSG_SIZE) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: bulk get return message too large." - " Bget return message is over Maximum size allowed of %d.", md->mdhim_rank, - MDHIM_MAX_MSG_SIZE); - return MDHIM_ERROR; - } - mesg_size = m_size; // Safe size to use in MPI_pack - *sendsize = mesg_size; - bgrm->basem.size = mesg_size; - - if ((*sendbuf = malloc(mesg_size * sizeof(char))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to pack bulk get return message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - outbuf = *sendbuf; - // pack the message first with the structure and then followed by key and data values (plus lengths). - return_code = MPI_Pack(bgrm, sizeof(struct mdhim_bgetrm_t), MPI_CHAR, outbuf, mesg_size, - &mesg_idx, md->mdhim_comm); - - // For the each of the keys and data pack the chars plus two ints for key_len and data_len. - for (i=0; i < bgrm->num_keys; i++) { - return_code += MPI_Pack(&bgrm->key_lens[i], 1, MPI_INT, outbuf, mesg_size, - &mesg_idx, md->mdhim_comm); - if (bgrm->key_lens[i] > 0) { - return_code += MPI_Pack(bgrm->keys[i], bgrm->key_lens[i], MPI_CHAR, outbuf, - mesg_size, &mesg_idx, md->mdhim_comm); - } - - return_code += MPI_Pack(&bgrm->value_lens[i], 1, MPI_INT, outbuf, mesg_size, - &mesg_idx, md->mdhim_comm); - /* Pack the value retrieved from the db - There is a chance that the key didn't exist in the db */ - if (bgrm->value_lens[i] > 0) { - return_code += MPI_Pack(bgrm->values[i], bgrm->value_lens[i], - MPI_CHAR, outbuf, mesg_size, - &mesg_idx, md->mdhim_comm); - } - } - - // If the pack did not succeed then log the error and return the error code - if ( return_code != MPI_SUCCESS ) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to pack " - "the bulk get return message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - return MDHIM_SUCCESS; -} - -/** - * unpack_bgetrm_message - * Unpacks a bulk get return message structure into contiguous memory for message passing - * - * @param md in main MDHIM struct - * @param message in pointer for packed message - * @param mesg_size in size of the incoming message - * @param bgetrm out bulk get return message which will be unpacked from the message - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - * - * struct mdhim_bgetrm_t { - int mtype; - int error; - void **keys; - int *key_lens; - void **values; - int *value_lens; - int num_records; - }; -*/ -int unpack_bgetrm_message(struct mdhim_t *md, void *message, int mesg_size, void **bgetrm) { - - int return_code = MPI_SUCCESS; // MPI_SUCCESS = 0 - int mesg_idx = 0; // Variable for incremental unpack - int i; - struct mdhim_bgetrm_t *bgrm; - - if ((*((struct mdhim_bgetrm_t **) bgetrm) = malloc(sizeof(struct mdhim_bgetrm_t))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to unpack bget return message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - bgrm = *((struct mdhim_bgetrm_t **) bgetrm); - // Unpack the message first with the structure and then followed by key and data values. - return_code = MPI_Unpack(message, mesg_size, &mesg_idx, bgrm, sizeof(struct mdhim_bgetrm_t), - MPI_CHAR, md->mdhim_comm); - - // Allocate memory for key pointers, to be populated later. - if ((bgrm->keys = malloc(bgrm->num_keys * sizeof(void *))) == NULL) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to unpack bgetrm message.", md->mdhim_rank); - return MDHIM_ERROR; - } - memset(bgrm->keys, 0, sizeof(void *) * bgrm->num_keys); - - // Allocate memory for key_lens, to be populated later. - if ((bgrm->key_lens = malloc(bgrm->num_keys * sizeof(int))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to unpack bget return message.", md->mdhim_rank); - return MDHIM_ERROR; - } - memset(bgrm->key_lens, 0, sizeof(int) * bgrm->num_keys); - - // Allocate memory for value pointers, to be populated later. - if ((bgrm->values = malloc(bgrm->num_keys * sizeof(void *))) == NULL) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to unpack bgetrm message.", md->mdhim_rank); - return MDHIM_ERROR; - } - memset(bgrm->values, 0, sizeof(void *) * bgrm->num_keys); - - // Allocate memory for value_lens, to be populated later. - if ((bgrm->value_lens = (int *)malloc(bgrm->num_keys * sizeof(int))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to unpack bget return message.", md->mdhim_rank); - return MDHIM_ERROR; - } - memset(bgrm->value_lens, 0, sizeof(int) * bgrm->num_keys); - - // For the each of the keys and data unpack the chars plus two ints for key_lens[i] and data_lens[i]. - for (i=0; i < bgrm->num_keys; i++) { - // Unpack the key_lens[i] - return_code += MPI_Unpack(message, mesg_size, &mesg_idx, &bgrm->key_lens[i], 1, - MPI_INT, md->mdhim_comm); - - // Unpack key by first allocating memory and then extracting the values from message - bgrm->keys[i] = NULL; - if (bgrm->key_lens[i] && - (bgrm->keys[i] = (char *)malloc(bgrm->key_lens[i] * sizeof(char))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to unpack bget return message.", md->mdhim_rank); - return MDHIM_ERROR; - } - if (bgrm->keys[i]) { - return_code += MPI_Unpack(message, mesg_size, &mesg_idx, bgrm->keys[i], bgrm->key_lens[i], - MPI_CHAR, md->mdhim_comm); - } - - // Unpack the value_lens[i] - return_code += MPI_Unpack(message, mesg_size, &mesg_idx, &bgrm->value_lens[i], 1, - MPI_INT, md->mdhim_comm); - - //There wasn't a value found for this key - if (!bgrm->value_lens[i]) { - bgrm->values[i] = NULL; - continue; - } - - // Unpack data by first allocating memory and then extracting the values from message - bgrm->values[i] = NULL; - if (bgrm->value_lens[i] && - (bgrm->values[i] = (char *)malloc(bgrm->value_lens[i] * sizeof(char))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to unpack bget return message.", md->mdhim_rank); - return MDHIM_ERROR; - } - if (bgrm->values[i]) { - return_code += MPI_Unpack(message, mesg_size, &mesg_idx, bgrm->values[i], - bgrm->value_lens[i], - MPI_CHAR, md->mdhim_comm); - } - - } - - // If the unpack did not succeed then log the error and return the error code - if ( return_code != MPI_SUCCESS ) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to unpack " - "the bget return message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - return MDHIM_SUCCESS; -} - -///------------------------ - -/** - * pack_base_message - * Packs a base message structure into contiguous memory for message passing - * - * @param md in main MDHIM struct - * @param cm in structure base message which will be packed into the sendbuf - * @param sendbuf out double pointer for packed message to send - * @param sendsize out pointer for packed message size - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - * - * struct mdhim_basem_t { - int mtype; - }; -*/ -int pack_base_message(struct mdhim_t *md, struct mdhim_basem_t *cm, void **sendbuf, int *sendsize) { - - int return_code = MPI_SUCCESS; // MPI_SUCCESS = 0 - int64_t m_size = sizeof(struct mdhim_basem_t); // Generous variable for size calculation - int mesg_size; // Variable to be used as parameter for MPI_pack of safe size - int mesg_idx = 0; // Variable for incremental pack - void *outbuf; - - mesg_size = m_size; - *sendsize = mesg_size; - cm->size = mesg_size; - - if ((*sendbuf = malloc(mesg_size * sizeof(char))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to pack del message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - outbuf = *sendbuf; - // pack the message first with the structure and then followed by key and data values. - return_code = MPI_Pack(cm, sizeof(struct mdhim_basem_t), MPI_CHAR, outbuf, mesg_size, - &mesg_idx, md->mdhim_comm); - - // If the pack did not succeed then log the error and return the error code - if ( return_code != MPI_SUCCESS ) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to pack " - "the del message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - return MDHIM_SUCCESS; -} - -///------------------------ - -/** - * pack_del_message - * Packs a delete message structure into contiguous memory for message passing - * - * @param md in main MDHIM struct - * @param dm in structure del_message which will be packed into the sendbuf - * @param sendbuf out double pointer for packed message to send - * @param sendsize out pointer for packed message size - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - * - * struct mdhim_delm_t { - int mtype; - void *key; - int key_len; - int server_rank; - }; -*/ -int pack_del_message(struct mdhim_t *md, struct mdhim_delm_t *dm, void **sendbuf, int *sendsize) { - - int return_code = MPI_SUCCESS; // MPI_SUCCESS = 0 - int64_t m_size = sizeof(struct mdhim_delm_t); // Generous variable for size calculation - int mesg_size; // Variable to be used as parameter for MPI_pack of safe size - int mesg_idx = 0; // Variable for incremental pack - - // Add to size the length of the key and data fields - m_size += dm->key_len; - - // Is the computed message size of a safe value? (less than a max message size?) - if (m_size > MDHIM_MAX_MSG_SIZE) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: del message too large." - " Del is over Maximum size allowed of %d.", md->mdhim_rank, MDHIM_MAX_MSG_SIZE); - return MDHIM_ERROR; - } - mesg_size = m_size; - *sendsize = mesg_size; - dm->basem.size = mesg_size; - - if ((*sendbuf = malloc(mesg_size * sizeof(char))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to pack del message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - // pack the message first with the structure and then followed by key and data values. - return_code = MPI_Pack(dm, sizeof(struct mdhim_delm_t), MPI_CHAR, *sendbuf, - mesg_size, &mesg_idx, md->mdhim_comm); - return_code += MPI_Pack(dm->key, dm->key_len, MPI_CHAR, *sendbuf, - mesg_size, &mesg_idx, md->mdhim_comm); - - // If the pack did not succeed then log the error and return the error code - if ( return_code != MPI_SUCCESS ) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to pack " - "the del message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - return MDHIM_SUCCESS; -} - -/** - * pack_bdel_message - * Packs a bdel message structure into contiguous memory for message passing - * - * @param md in main MDHIM struct - * @param bdm in structure bdel_message which will be packed into the message - * @param sendbuf out double pointer for packed message to send - * @param sendsize out pointer for packed message size - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - * - * struct mdhim_bdelm_t { - int mtype; - void **keys; - int *key_lens; - int num_records; - int server_rank; - }; -*/ -int pack_bdel_message(struct mdhim_t *md, struct mdhim_bdelm_t *bdm, void **sendbuf, - int *sendsize) { - - int return_code = MPI_SUCCESS; // MPI_SUCCESS = 0 - int64_t m_size = sizeof(struct mdhim_bdelm_t); // Generous variable for size calc - int mesg_size; // Variable to be used as parameter for MPI_pack of safe size - int mesg_idx = 0; - int i; - - // Add up the size of message - m_size += bdm->num_keys * sizeof(int); - - // For the each of the keys add enough chars. - for (i=0; i < bdm->num_keys; i++) - m_size += bdm->key_lens[i]; - - // Is the computed message size of a safe value? (less than a max message size?) - if (m_size > MDHIM_MAX_MSG_SIZE) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: bulk del message too large." - " Bdel is over Maximum size allowed of %d.", md->mdhim_rank, MDHIM_MAX_MSG_SIZE); - return MDHIM_ERROR; - } - mesg_size = m_size; // Safe size to use in MPI_pack - *sendsize = mesg_size; - bdm->basem.size = mesg_size; - - if ((*sendbuf = malloc(mesg_size * sizeof(char))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to pack bulk del message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - // pack the message first with the structure and then followed by key (plus lengths). - return_code = MPI_Pack(bdm, sizeof(struct mdhim_bdelm_t), MPI_CHAR, *sendbuf, - mesg_size, &mesg_idx, md->mdhim_comm); - - // For the each of the keys and data pack the chars plus one int for key_len. - for (i=0; i < bdm->num_keys; i++) { - return_code += MPI_Pack(&bdm->key_lens[i], 1, MPI_INT, *sendbuf, - mesg_size, &mesg_idx, md->mdhim_comm); - return_code += MPI_Pack(bdm->keys[i], bdm->key_lens[i], MPI_CHAR, - *sendbuf, mesg_size, &mesg_idx, - md->mdhim_comm); - } - - // If the pack did not succeed then log the error and return the error code - if ( return_code != MPI_SUCCESS ) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to pack " - "the bulk del message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - return MDHIM_SUCCESS; -} - -/** - * unpack_del_message - * Unpacks a del message structure into contiguous memory for message passing - * - * @param md in main MDHIM struct - * @param message in pointer for packed message - * @param mesg_size in size of the incoming message - * @param delm out structure get_message which will be unpacked from the message - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - * - * struct mdhim_delm_t { - int mtype; - void *key; - int key_len; - int server_rank; - }; -*/ -int unpack_del_message(struct mdhim_t *md, void *message, int mesg_size, void **delm) { - - int return_code = MPI_SUCCESS; // MPI_SUCCESS = 0 - int mesg_idx = 0; // Variable for incremental unpack - struct mdhim_delm_t *dm; - - if ((*((struct mdhim_delm_t **) delm) = malloc(sizeof(struct mdhim_delm_t))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to unpack del message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - dm = *((struct mdhim_delm_t **) delm); - // Unpack the message first with the structure and then followed by key and data values. - return_code = MPI_Unpack(message, mesg_size, &mesg_idx, dm, sizeof(struct mdhim_delm_t), - MPI_CHAR, md->mdhim_comm); - - // Unpack key by first allocating memory and then extracting the values from message - if ((dm->key = (char *)malloc(dm->key_len * sizeof(char))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to unpack del message.", md->mdhim_rank); - return MDHIM_ERROR; - } - return_code += MPI_Unpack(message, mesg_size, &mesg_idx, dm->key, dm->key_len, - MPI_CHAR, md->mdhim_comm); - - // If the unpack did not succeed then log the error and return the error code - if ( return_code != MPI_SUCCESS ) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to unpack " - "the del message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - return MDHIM_SUCCESS; -} - -/** - * unpack_bdel_message - * Unpacks a bulk del message structure into contiguous memory for message passing - * - * @param md in main MDHIM struct - * @param message in pointer for packed message - * @param mesg_size in size of the incoming message - * @param bdelm out structure bulk_del_message which will be unpacked from the message - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - * - * struct mdhim_bdelm_t { - int mtype; - void **keys; - int *key_lens; - int num_records; - int server_rank; - }; -*/ -int unpack_bdel_message(struct mdhim_t *md, void *message, int mesg_size, void **bdelm) { - - int return_code = MPI_SUCCESS; // MPI_SUCCESS = 0 - int mesg_idx = 0; // Variable for incremental unpack - int i; - int num_records; - - if ((*((struct mdhim_bdelm_t **) bdelm) = malloc(sizeof(struct mdhim_bdelm_t))) == NULL) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to unpack bdel message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - // Unpack the message first with the structure and then followed by key and data values. - return_code = MPI_Unpack(message, mesg_size, &mesg_idx, - (*((struct mdhim_bdelm_t **) bdelm)), sizeof(struct mdhim_bdelm_t), - MPI_CHAR, md->mdhim_comm); - - num_records = (*((struct mdhim_bdelm_t **) bdelm))->num_keys; - // Allocate memory for keys, to be populated later. - if (((*((struct mdhim_bdelm_t **) bdelm))->keys = - malloc(num_records * sizeof(void *))) == NULL) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to unpack bdel message.", md->mdhim_rank); - return MDHIM_ERROR; - } - // Allocate memory for key_lens, to be populated later. - if (((*((struct mdhim_bdelm_t **) bdelm))->key_lens = - (int *)malloc(num_records * sizeof(int))) == NULL) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to unpack bdel message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - // For the each of the keys and data unpack the chars plus an int for key_lens[i]. - for (i=0; i < num_records; i++) { - // Unpack the key_lens[i] - return_code += MPI_Unpack(message, mesg_size, &mesg_idx, - &(*((struct mdhim_bdelm_t **) bdelm))->key_lens[i], 1, - MPI_INT, md->mdhim_comm); - - // Unpack key by first allocating memory and then extracting the values from message - if (((*((struct mdhim_bdelm_t **) bdelm))->keys[i] = - (char *)malloc((*((struct mdhim_bdelm_t **) bdelm))->key_lens[i] * - sizeof(char))) == NULL) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to unpack bdel message.", md->mdhim_rank); - return MDHIM_ERROR; - } - return_code += MPI_Unpack(message, mesg_size, &mesg_idx, - (*((struct mdhim_bdelm_t **) bdelm))->keys[i], - (*((struct mdhim_bdelm_t **) bdelm))->key_lens[i], - MPI_CHAR, md->mdhim_comm); - } - - // If the unpack did not succeed then log the error and return the error code - if ( return_code != MPI_SUCCESS ) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - Error: unable to unpack " - "the bdel message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - return MDHIM_SUCCESS; -} - -///------------------------ - - -/** - * pack_return_message - * Packs a return message structure into contiguous memory for message passing - * - * @param md in main MDHIM struct - * @param rm in structure which will be packed into the sendbuf - * @param sendbuf out double pointer for packed message to send - * @param sendsize out pointer for packed message size - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - * - * struct mdhim_rm_t { - int mtype; - int error; - }; -*/ -int pack_return_message(struct mdhim_t *md, struct mdhim_rm_t *rm, void **sendbuf, int *sendsize) { - - int return_code = MPI_SUCCESS; // MPI_SUCCESS = 0 - int mesg_size = sizeof(struct mdhim_rm_t); - int mesg_idx = 0; - void *outbuf; - - *sendsize = mesg_size; - rm->basem.size = mesg_size; - - if ((*sendbuf = malloc(mesg_size * sizeof(char))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to pack return message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - outbuf = *sendbuf; - // Pack the message from the structure - return_code = MPI_Pack(rm, sizeof(struct mdhim_rm_t), MPI_CHAR, outbuf, mesg_size, &mesg_idx, - md->mdhim_comm); - - // If the pack did not succeed then log the error and return the error code - if ( return_code != MPI_SUCCESS ) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to pack " - "the return message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - return MDHIM_SUCCESS; -} - -/** - * unpack_return_message - * unpacks a return message structure into contiguous memory for message passing - * - * @param md in main MDHIM struct - * @param message out pointer for buffer to unpack message to - * @param retm in return message that will be unpacked into message - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - * - * struct mdhim_rm_t { - int mtype; - int error; - }; -*/ -int unpack_return_message(struct mdhim_t *md, void *message, void **retm) { - - int return_code = MPI_SUCCESS; // MPI_SUCCESS = 0 - int mesg_size = sizeof(struct mdhim_rm_t); - int mesg_idx = 0; - struct mdhim_rm_t *rm; - - if (((*(struct mdhim_rm_t **) retm) = malloc(sizeof(struct mdhim_rm_t))) == NULL) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to allocate " - "memory to unpack return message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - rm = *((struct mdhim_rm_t **) retm); - - // Unpack the structure from the message - return_code = MPI_Unpack(message, mesg_size, &mesg_idx, rm, sizeof(struct mdhim_rm_t), - MPI_CHAR, md->mdhim_comm); - - // If the pack did not succeed then log the error and return the error code - if ( return_code != MPI_SUCCESS ) { - mlog(MDHIM_CLIENT_CRIT, "MDHIM Rank: %d - Error: unable to unpack " - "the return message.", md->mdhim_rank); - return MDHIM_ERROR; - } - - return MDHIM_SUCCESS; -} - -/** - * Frees all memory taken up by messages - including keys and values - * - * @param msg pointer to the message to free - */ -void mdhim_full_release_msg(void *msg) { - int mtype; - int i; - - if (!msg) { - return; - } - - //Determine the message type and free accordingly - mtype = ((struct mdhim_basem_t *) msg)->mtype; - switch(mtype) { - case MDHIM_RECV: - free((struct mdhim_rm_t *) msg); - break; - case MDHIM_RECV_BULK_GET: - for (i = 0; i < ((struct mdhim_bgetrm_t *) msg)->num_keys; i++) { - if (((struct mdhim_bgetrm_t *) msg)->key_lens[i] && - ((struct mdhim_bgetrm_t *) msg)->keys[i]) { - free(((struct mdhim_bgetrm_t *) msg)->keys[i]); - } - if (((struct mdhim_bgetrm_t *) msg)->value_lens[i] && - ((struct mdhim_bgetrm_t *) msg)->values[i]) { - free(((struct mdhim_bgetrm_t *) msg)->values[i]); - } - } - - if (((struct mdhim_bgetrm_t *) msg)->key_lens) { - free(((struct mdhim_bgetrm_t *) msg)->key_lens); - } - if (((struct mdhim_bgetrm_t *) msg)->keys) { - free(((struct mdhim_bgetrm_t *) msg)->keys); - } - if (((struct mdhim_bgetrm_t *) msg)->value_lens) { - free(((struct mdhim_bgetrm_t *) msg)->value_lens); - } - if (((struct mdhim_bgetrm_t *) msg)->values) { - free(((struct mdhim_bgetrm_t *) msg)->values); - } - - free((struct mdhim_bgetrm_t *) msg); - break; - case MDHIM_BULK_PUT: - for (i = 0; i < ((struct mdhim_bputm_t *) msg)->num_keys; i++) { - if (((struct mdhim_bputm_t *) msg)->key_lens[i] && - ((struct mdhim_bputm_t *) msg)->keys[i]) { - free(((struct mdhim_bputm_t *) msg)->keys[i]); - } - if (((struct mdhim_bputm_t *) msg)->value_lens[i] && - ((struct mdhim_bputm_t *) msg)->values[i]) { - free(((struct mdhim_bputm_t *) msg)->values[i]); - } - } - - if (((struct mdhim_bputm_t *) msg)->key_lens) { - free(((struct mdhim_bputm_t *) msg)->key_lens); - } - if (((struct mdhim_bputm_t *) msg)->keys) { - free(((struct mdhim_bputm_t *) msg)->keys); - } - if (((struct mdhim_bputm_t *) msg)->value_lens) { - free(((struct mdhim_bputm_t *) msg)->value_lens); - } - if (((struct mdhim_bputm_t *) msg)->values) { - free(((struct mdhim_bputm_t *) msg)->values); - } - - free((struct mdhim_bputm_t *) msg); - break; - default: - break; - } -} - - -/** - * Frees memory taken up by messages except for the keys and values - * - * @param msg pointer to the message to free - */ -void mdhim_partial_release_msg(void *msg) { - int mtype; - - if (!msg) { - return; - } - - //Determine the message type and free accordingly - mtype = ((struct mdhim_basem_t *) msg)->mtype; - switch(mtype) { - case MDHIM_RECV: - free((struct mdhim_rm_t *) msg); - break; - case MDHIM_RECV_BULK_GET: - if (((struct mdhim_bgetrm_t *) msg)->key_lens) { - free(((struct mdhim_bgetrm_t *) msg)->key_lens); - } - if (((struct mdhim_bgetrm_t *) msg)->keys) { - free(((struct mdhim_bgetrm_t *) msg)->keys); - } - if (((struct mdhim_bgetrm_t *) msg)->value_lens) { - free(((struct mdhim_bgetrm_t *) msg)->value_lens); - } - if (((struct mdhim_bgetrm_t *) msg)->values) { - free(((struct mdhim_bgetrm_t *) msg)->values); - } - - free((struct mdhim_bgetrm_t *) msg); - break; - case MDHIM_BULK_PUT: - if (((struct mdhim_bputm_t *) msg)->key_lens) { - free(((struct mdhim_bputm_t *) msg)->key_lens); - } - if (((struct mdhim_bputm_t *) msg)->keys) { - free(((struct mdhim_bputm_t *) msg)->keys); - } - if (((struct mdhim_bputm_t *) msg)->value_lens) { - free(((struct mdhim_bputm_t *) msg)->value_lens); - } - if (((struct mdhim_bputm_t *) msg)->values) { - free(((struct mdhim_bputm_t *) msg)->values); - } - - free((struct mdhim_bputm_t *) msg); - break; - default: - break; - } -} diff --git a/meta/src/messages.h b/meta/src/messages.h deleted file mode 100644 index 9419b9eb0..000000000 --- a/meta/src/messages.h +++ /dev/null @@ -1,243 +0,0 @@ -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * - * Copyright 2017, UT-Battelle, LLC. - * - * LLNL-CODE-741539 - * All rights reserved. - * - * This is the license for UnifyFS. - * For details, see https://github.com/LLNL/UnifyFS. - * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. - */ - -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * Copyright (c) 2017, Florida State University. Contributions from - * the Computer Architecture and Systems Research Laboratory (CASTL) - * at the Department of Computer Science. - * - * Written by: Teng Wang, Adam Moody, Weikuan Yu, Kento Sato, Kathryn Mohror - * LLNL-CODE-728877. All rights reserved. - * - * This file is part of burstfs. - * For details, see https://github.com/llnl/burstfs - * Please read https://github.com/llnl/burstfs/LICENSE for full license text. - */ - -/* - * - * Copyright (c) 2014, Los Alamos National Laboratory - * All rights reserved. - * - */ - - -#ifndef __MESSAGES_H -#define __MESSAGES_H - -#ifdef __cplusplus -extern "C" -{ -#endif -#include "range_server.h" - -/* Message Types */ - -//Put a single key in the data store -#define MDHIM_PUT 1 -//Put multiple keys in the data store at one time -#define MDHIM_BULK_PUT 2 -//Get multiple keys from the data store at one time -#define MDHIM_BULK_GET 3 -//Delete a single key from the data store -#define MDHIM_DEL 4 -//Delete multiple keys from the data store at once -#define MDHIM_BULK_DEL 5 -//Close message -#define MDHIM_CLOSE 6 -//Generic receive message -#define MDHIM_RECV 7 -//Receive message for a get request -#define MDHIM_RECV_GET 8 -//Receive message for a bulk get request -#define MDHIM_RECV_BULK_GET 9 -//Commit message -#define MDHIM_COMMIT 10 - -/* Operations for getting a key/value */ -//Get the value for the specified key -#define MDHIM_GET_EQ 0 -//Get the next key and value -#define MDHIM_GET_NEXT 1 -//Get the previous key and value -#define MDHIM_GET_PREV 2 -//Get the first key and value -#define MDHIM_GET_FIRST 3 -//Get the last key and value -#define MDHIM_GET_LAST 4 -/* Use these operation types for retrieving the primary key - from a secondary index and key. */ -//Gets the primary key's value from a secondary key -#define MDHIM_GET_PRIMARY_EQ 5 -#define MDHIM_RANGE_BGET 6 - -//Message Types -#define RANGESRV_WORK_MSG 1 -#define RANGESRV_WORK_SIZE_MSG 2 -#define RANGESRV_INFO 3 -#define CLIENT_RESPONSE_MSG 4 -#define CLIENT_RESPONSE_SIZE_MSG 5 - -//#define MAX_BULK_OPS 1000000 -#define MAX_BULK_OPS 20000000 - -//Maximum size of messages allowed -#define MDHIM_MAX_MSG_SIZE 2147483647 -struct mdhim_t; - -/* Base message */ -struct mdhim_basem_t { - //Message type - int mtype; - int server_rank; - int size; - int index; - int index_type; - char *index_name; -}; -typedef struct mdhim_basem_t mdhim_basem_t; - -/* Put message */ -struct mdhim_putm_t { - mdhim_basem_t basem; - void *key; - int key_len; - void *value; - int value_len; -}; - -/* Bulk put message */ -struct mdhim_bputm_t { - mdhim_basem_t basem; - void **keys; - int *key_lens; - void **values; - int *value_lens; - int num_keys; -}; - -/* Get record message */ -struct mdhim_getm_t { - mdhim_basem_t basem; - //Operation type e.g., MDHIM_GET_EQ, MDHIM_GET_NEXT, MDHIM_GET_PREV - int op; - /* The key to get if op is MDHIM_GET_EQ - If op is MDHIM_GET_NEXT or MDHIM_GET_PREV the key is the last key to start from - */ - void *key; - //The length of the key - int key_len; - int num_keys; -}; - -/* Bulk get record message */ -struct mdhim_bgetm_t { - mdhim_basem_t basem; - //Operation type i.e, MDHIM_GET_EQ, MDHIM_GET_NEXT, MDHIM_GET_PREV - int op; - void **keys; - int *key_lens; - int num_keys; - - //Number of records to retrieve per key given - int num_recs; -}; - -/* Delete message */ -struct mdhim_delm_t { - mdhim_basem_t basem; - void *key; - int key_len; -}; - -/* Bulk delete record message */ -struct mdhim_bdelm_t { - mdhim_basem_t basem; - void **keys; - int *key_lens; - int num_keys; -}; - -/* Range server info message */ -struct mdhim_rsi_t { - //The range server number, which is a number 1 - N where N is the number of servers - uint32_t rangesrv_num; -}; - -/* Generic receive message */ -struct mdhim_rm_t { - mdhim_basem_t basem; - int error; -}; - -/* Bulk get receive message */ -struct mdhim_bgetrm_t { - mdhim_basem_t basem; - int error; - void **keys; - int *key_lens; - void **values; - int *value_lens; - int num_keys; - struct mdhim_bgetrm_t *next; -}; - -/* Bulk generic receive message */ -struct mdhim_brm_t { - mdhim_basem_t basem; - int error; - struct mdhim_brm_t *next; -}; - - -int send_rangesrv_work(struct mdhim_t *md, int dest, void *message); -int send_all_rangesrv_work(struct mdhim_t *md, void **messages, int num_srvs); -int receive_rangesrv_work(struct mdhim_t *md, int *src, void **message); -int send_client_response(struct mdhim_t *md, int dest, void *message, int *sizebuf, - void **sendbuf, MPI_Request **size_req, MPI_Request **msg_req); -int receive_client_response(struct mdhim_t *md, int src, void **message); -int receive_all_client_responses(struct mdhim_t *md, int *srcs, int nsrcs, - void ***messages); -int pack_put_message(struct mdhim_t *md, struct mdhim_putm_t *pm, void **sendbuf, int *sendsize); -int pack_bput_message(struct mdhim_t *md, struct mdhim_bputm_t *bpm, void **sendbuf, int *sendsize); -int unpack_put_message(struct mdhim_t *md, void *message, int mesg_size, void **pm); -int unpack_bput_message(struct mdhim_t *md, void *message, int mesg_size, void **bpm); - -int pack_get_message(struct mdhim_t *md, struct mdhim_getm_t *gm, void **sendbuf, int *sendsize); -int pack_bget_message(struct mdhim_t *md, struct mdhim_bgetm_t *bgm, void **sendbuf, int *sendsize); -int unpack_get_message(struct mdhim_t *md, void *message, int mesg_size, void **gm); -int unpack_bget_message(struct mdhim_t *md, void *message, int mesg_size, void **bgm); - -int pack_bgetrm_message(struct mdhim_t *md, struct mdhim_bgetrm_t *bgrm, void **sendbuf, int *sendsize); -int unpack_bgetrm_message(struct mdhim_t *md, void *message, int mesg_size, void **bgrm); - -int pack_del_message(struct mdhim_t *md, struct mdhim_delm_t *dm, void **sendbuf, int *sendsize); -int pack_bdel_message(struct mdhim_t *md, struct mdhim_bdelm_t *bdm, void **sendbuf, int *sendsize); -int unpack_del_message(struct mdhim_t *md, void *message, int mesg_size, void **dm); -int unpack_bdel_message(struct mdhim_t *md, void *message, int mesg_size, void **bdm); - -int pack_return_message(struct mdhim_t *md, struct mdhim_rm_t *rm, void **sendbuf, int *sendsize); -int unpack_return_message(struct mdhim_t *md, void *message, void **rm); - -int pack_base_message(struct mdhim_t *md, struct mdhim_basem_t *cm, void **sendbuf, int *sendsize); - -void mdhim_full_release_msg(void *message); -void mdhim_partial_release_msg(void *message); - -#ifdef __cplusplus -} -#endif -#endif diff --git a/meta/src/partitioner.c b/meta/src/partitioner.c deleted file mode 100644 index 6f6013b80..000000000 --- a/meta/src/partitioner.c +++ /dev/null @@ -1,1035 +0,0 @@ -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * - * Copyright 2017, UT-Battelle, LLC. - * - * LLNL-CODE-741539 - * All rights reserved. - * - * This is the license for UnifyFS. - * For details, see https://github.com/LLNL/UnifyFS. - * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. - */ - -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * Copyright (c) 2017, Florida State University. Contributions from - * the Computer Architecture and Systems Research Laboratory (CASTL) - * at the Department of Computer Science. - * - * Written by: Teng Wang, Adam Moody, Weikuan Yu, Kento Sato, Kathryn Mohror - * LLNL-CODE-728877. All rights reserved. - * - * This file is part of burstfs. - * For details, see https://github.com/llnl/burstfs - * Please read https://github.com/llnl/burstfs/LICENSE for full license text. - */ - -/* - * - * Copyright (c) 2014, Los Alamos National Laboratory - * All rights reserved. - * - */ - -#include -#include -#include -#include -#include -#include "partitioner.h" - -#include "unifyfs_metadata_mdhim.h" - -struct timeval calslicestart, calsliceend; -double calslicetime = 0; -struct timeval rangehashstart, rangehashend; -double rangehashtime = 0; -struct timeval serhashstart, serhashend; -double serhashtime = 0; -unsigned long meta_pair[2] = {0}; -//Global hashtable for alphabet used in partitioner algorithm -struct mdhim_char *mdhim_alphabet = NULL; - -/** - * delete_alphabet - * Deletes the alphabet hash table - */ -void delete_alphabet() { - struct mdhim_char *cur_char, *tmp; - HASH_ITER(hh, mdhim_alphabet, cur_char, tmp) { - HASH_DEL(mdhim_alphabet, cur_char); /*delete it (mdhim_alphabet advances to next)*/ - free(cur_char); /* free it */ - } - - mdhim_alphabet = NULL; -} - -long double get_str_num(void *key, uint32_t key_len) { - int id, i; - struct mdhim_char *mc; - long double str_num; - - str_num = 0; - //Iterate through each character to perform the algorithm mentioned above - for (i = 0; i < key_len; i++) { - //Ignore null terminating char - if (i == key_len - 1 && ((char *)key)[i] == '\0') { - break; - } - - id = (int) ((char *)key)[i]; - HASH_FIND_INT(mdhim_alphabet, &id, mc); - str_num += mc->pos * powl(2, MDHIM_ALPHABET_EXPONENT * -(i + 1)); - } - - return str_num; -} - -/* Allocate a copy of a key and return it. The returned key must be freed. */ -void* copy_unifyfs_key(void* key, uint32_t key_len) -{ - void* key_copy = malloc((size_t)key_len); - memcpy(key_copy, key, (size_t)key_len); - return key_copy; -} - -uint64_t get_byte_num(void *key, uint32_t key_len) { - uint64_t byte_num; - - byte_num = *((long *)(((char *)key)+sizeof(long))); - return byte_num; -} - -void partitioner_init() { - // Create the alphabet for string keys - build_alphabet(); -} - -/* - * partitioner_release - * Releases memory in use by the partitioner - * - */ -void partitioner_release() { - delete_alphabet(); - mdhim_alphabet = NULL; -} - -/** - * add_char - * Adds a character to our alphabet hash table - * - * @param id The id of our entry (the ascii code of the character) - * @param pos The value of our entry (the position of the character in our alphabet) - */ -void add_char(int id, int pos) { - struct mdhim_char *mc; - - //Create a new mdhim_char to hold our entry - mc = malloc(sizeof(struct mdhim_char)); - - //Set the mdhim_char - mc->id = id; - mc->pos = pos; - - //Add it to the hash table - HASH_ADD_INT(mdhim_alphabet, id, mc); - - return; -} - -/** - * build_alphabet - * Creates our ascii based alphabet and inserts each character into a uthash table - */ -void build_alphabet() { - char c; - int i, indx; - - /* Index of the character in the our alphabet - This is to number each character we care about so we can map - a string to a range server - - 0 - 9 have indexes 0 - 9 - A - Z have indexes 10 - 35 - a - z have indexes 36 - 61 - */ - indx = 0; - - //Start with numbers 0 - 9 - c = '0'; - for (i = (int) c; i <= (int) '9'; i++) { - add_char(i, indx); - indx++; - } - - //Next deal with A-Z - c = 'A'; - for (i = (int) c; i <= (int) 'Z'; i++) { - add_char(i, indx); - indx++; - } - - //Next deal with a-z - c = 'a'; - for (i = (int) c; i <= (int) 'z'; i++) { - add_char(i, indx); - indx++; - } - - return; -} - -void _add_to_rangesrv_list(rangesrv_list **list, rangesrv_info *ri) { - rangesrv_list *list_p, *entry; - - entry = malloc(sizeof(rangesrv_list)); - entry->ri = ri; - entry->next = NULL; - if (!*list) { - *list = entry; - } else { - list_p = *list; - if (list_p->ri == ri) return; - while (list_p->next) { - list_p = list_p->next; - if (list_p->ri == ri) return; - } - - list_p->next = entry; - } - - return; -} - -/** - * verify_key - * Determines whether the given key is a valid key or not - * - * @param key the key to check - * @param key_len the length of the key - * @param key_type the type of the key - * - * @return MDHIM_ERROR if the key is not valid, otherwise the MDHIM_SUCCESS - */ -int verify_key(struct index_t *index, void *key, - int key_len, int key_type) { - int i; - int id; - struct mdhim_char *mc; - uint64_t ikey = 0; - uint64_t size_check; - - if (!key) { - return MDHIM_ERROR; - } - - if (key_len > MAX_KEY_LEN) { - return MDHIM_ERROR; - } - if (key_type == MDHIM_STRING_KEY) { - for (i = 0; i < key_len; i++) { - //Ignore null terminating char - if (i == key_len - 1 && ((char *)key)[i] == '\0') { - break; - } - - id = (int) ((char *)key)[i]; - HASH_FIND_INT(mdhim_alphabet, &id, mc); - if (!mc) { - return MDHIM_ERROR; - } - } - } - - if (key_type == MDHIM_INT_KEY) { - ikey = *(uint32_t *)key; - } else if (key_type == MDHIM_LONG_INT_KEY) { - ikey = *(uint64_t *)key; - } else if (key_type == MDHIM_FLOAT_KEY) { - ikey = *(float *)key; - } else if (key_type == MDHIM_DOUBLE_KEY) { - ikey = *(double *)key; - } - - size_check = ikey/index->mdhim_max_recs_per_slice; - if (size_check >= MDHIM_MAX_SLICES) { - mlog(MDHIM_CLIENT_CRIT, "Error - Not enough slices for this key." - " Try increasing the slice size."); - return MDHIM_ERROR; - } - - return MDHIM_SUCCESS; -} - -int is_float_key(int type) { - int ret = 0; - - if (type == MDHIM_STRING_KEY) { - ret = 1; - } else if (type == MDHIM_FLOAT_KEY) { - ret = 1; - } else if (type == MDHIM_DOUBLE_KEY) { - ret = 1; - } else if (type == MDHIM_INT_KEY) { - ret = 0; - } else if (type == MDHIM_LONG_INT_KEY) { - ret = 0; - } else if (type == MDHIM_BYTE_KEY) { - ret = 1; - } - - return ret; -} - -/** - * get_slice_num - * - * gets the slice number from a key - * slice is a portion of the range served by MDHIM - * each range server servers many slices of the range - * @param md main MDHIM struct - * @param key pointer to the key to find the range server of - * @param key_len length of the key - * @return the slice number or 0 on error - */ -int get_slice_num(struct mdhim_t *md, struct index_t *index, void *key, int key_len) { - //The number that maps a key to range server (dependent on key type) - // printf("getting slice num\n"); - fflush(stdout); - int slice_num; - uint64_t key_num; - //The range server number that we return - float fkey; - double dkey; - int ret; - long double map_num; - uint64_t total_keys; - int key_type = index->key_type; - //The last key number that can be represented by the number of slices and the slice size - total_keys = MDHIM_MAX_SLICES * index->mdhim_max_recs_per_slice; - - //Make sure this key is valid - if ((ret = verify_key(index, key, key_len, key_type)) != MDHIM_SUCCESS) { - mlog(MDHIM_CLIENT_INFO, "Rank: %d - Invalid key given", - md->mdhim_rank); - return MDHIM_ERROR; - } - - //Perform key dependent algorithm to get the key in terms of the ranges served - switch(key_type) { - case MDHIM_INT_KEY: - key_num = *(uint32_t *) key; - - break; - case MDHIM_LONG_INT_KEY: - key_num = *(uint64_t *) key; - - break; - case MDHIM_BYTE_KEY: - /* Algorithm used - 1. Iterate through each byte - 2. Transform each byte into a floating point number - 3. Add this floating point number to map_num - 4. Multiply this number times the total number of keys to get the number - that represents the position in a range - - For #2, the transformation is as follows: - - Take the position of the character in the mdhim alphabet - times 2 raised to 8 * -(i + 1) - where i is the current iteration in the loop - */ - - //Used for calculating the range server to use for this string - // map_num = 0; - // map_num = get_byte_num(key, key_len); - // key_num = floorl(map_num * total_keys); - key_num = get_byte_num(key, key_len); -// printf("key_num is: %ld\n", key_num); -// fflush(stdout); - break; - case MDHIM_FLOAT_KEY: - //Convert the key to a float - fkey = *((float *) key); - fkey = floor(fabsf(fkey)); - key_num = fkey; - - break; - case MDHIM_DOUBLE_KEY: - //Convert the key to a double - dkey = *((double *) key); - dkey = floor(fabs(dkey)); - key_num = dkey; - - break; - case MDHIM_STRING_KEY: - /* Algorithm used - - 1. Iterate through each character - 2. Transform each character into a floating point number - 3. Add this floating point number to map_num - 4. Multiply this number times the total number of keys to get the number - that represents the position in a range - - For #2, the transformation is as follows: - - Take the position of the character in the mdhim alphabet - times 2 raised to the MDHIM_ALPHABET_EXPONENT * -(i + 1) - where i is the current iteration in the loop - */ - - //Used for calculating the range server to use for this string - map_num = 0; - map_num = get_str_num(key, key_len); - key_num = floorl(map_num * total_keys); - - break; - case MDHIM_UNIFYFS_KEY: - /* Use only the gfid portion of the key, which ensures all extents - * for the same file hash to the same server */ - key_num = (uint64_t) UNIFYFS_KEY_FID(key); - break; - default: - return 0; - } - - - /* Convert the key to a slice number */ - slice_num = key_num/index->mdhim_max_recs_per_slice; - - //Return the slice number - return slice_num; -} - -/** - * get_range_server_by_slice - * - * gets the range server that handles the key given - * @param md main MDHIM struct - * @param slice the slice number - * @return the rank of the range server or NULL on error - */ -rangesrv_info *get_range_server_by_slice(struct mdhim_t *md, struct index_t *index, int slice) { - //The number that maps a key to range server (dependent on key type) - uint32_t rangesrv_num; - //The range server number that we return - rangesrv_info *ret_rp; - - if (index->num_rangesrvs == 1) { - rangesrv_num = 1; - } else { - rangesrv_num = slice % index->num_rangesrvs; - rangesrv_num++; - } - - //Find the range server number in the hash table - ret_rp = NULL; - HASH_FIND_INT(index->rangesrvs_by_num, &rangesrv_num, ret_rp); - - //Return the rank - return ret_rp; -} - -/** - * get_range_servers - * - * gets the range server that handles the key given - * @param md main MDHIM struct - * @param key pointer to the key to find the range server of - * @param key_len length of the key - * @return the rank of the range server or NULL on error - */ -rangesrv_list *get_range_servers(struct mdhim_t *md, struct index_t *index, - void *key, int key_len) { - //The number that maps a key to range server (dependent on key type) - int slice_num; - //The range server number that we return - rangesrv_info *ret_rp; - rangesrv_list *rl; - - if ((slice_num = get_slice_num(md, index, key, key_len)) == MDHIM_ERROR) { - return NULL; - } - - ret_rp = get_range_server_by_slice(md, index, slice_num); - rl = NULL; - _add_to_rangesrv_list(&rl, ret_rp); - - //Return the range server list - return rl; -} - -struct mdhim_stat *get_next_slice_stat(struct mdhim_t *md, struct index_t *index, - int slice_num) { - struct mdhim_stat *stat, *tmp, *next_slice; - - next_slice = NULL; - - //Iterate through the stat hash entries to find the slice - //number next after the given slice number - HASH_ITER(hh, index->stats, stat, tmp) { - if (!stat) { - continue; - } - - if (stat->key > slice_num && !next_slice) { - next_slice = stat; - } else if (next_slice && stat->key > slice_num && stat->key < next_slice->key) { - next_slice = stat; - } - } - - return next_slice; -} - -struct mdhim_stat *get_prev_slice_stat(struct mdhim_t *md, struct index_t *index, - int slice_num) { - struct mdhim_stat *stat, *tmp, *prev_slice; - - prev_slice = NULL; - - //Iterate through the stat hash entries to find the slice - //number next after the given slice number - HASH_ITER(hh, index->stats, stat, tmp) { - if (!stat) { - continue; - } - - if (stat->key < slice_num && !prev_slice) { - prev_slice = stat; - } else if (prev_slice && stat->key < slice_num && stat->key > prev_slice->key) { - prev_slice = stat; - } - } - - return prev_slice; -} - -struct mdhim_stat *get_last_slice_stat(struct mdhim_t *md, struct index_t *index) { - struct mdhim_stat *stat, *tmp, *last_slice; - - last_slice = NULL; - - //Iterate through the stat hash entries to find the slice - //number next after the given slice number - HASH_ITER(hh, index->stats, stat, tmp) { - if (!stat) { - continue; - } - - if (!last_slice) { - last_slice = stat; - } else if (stat->key > last_slice->key) { - last_slice = stat; - } - } - - return last_slice; -} - -struct mdhim_stat *get_first_slice_stat(struct mdhim_t *md, struct index_t *index) { - struct mdhim_stat *stat, *tmp, *first_slice; - - first_slice = NULL; - - //Iterate through the stat hash entries to find the slice - //number next after the given slice number - HASH_ITER(hh, index->stats, stat, tmp) { - if (!stat) { - continue; - } - - if (!first_slice) { - first_slice = stat; - } else if (stat->key < first_slice->key) { - first_slice = stat; - } - } - - return first_slice; -} - -int get_slice_from_fstat(struct mdhim_t *md, struct index_t *index, - int cur_slice, long double fstat, int op) { - int slice_num = 0; - struct mdhim_stat *cur_stat, *new_stat; - - if (!index->stats) { - return 0; - } - - //Get the stat struct for our current slice - HASH_FIND_INT(index->stats, &cur_slice, cur_stat); - - switch(op) { - case MDHIM_GET_NEXT: - slice_num = cur_slice; - break; - case MDHIM_GET_PREV: - if (cur_stat && *(long double *)cur_stat->min < fstat) { - slice_num = cur_slice; - goto done; - } else { - new_stat = get_prev_slice_stat(md, index, cur_slice); - goto new_stat; - } - - break; - case MDHIM_GET_FIRST: - new_stat = get_first_slice_stat(md, index); - goto new_stat; - break; - case MDHIM_GET_LAST: - new_stat = get_last_slice_stat(md, index); - goto new_stat; - break; - default: - slice_num = 0; - break; - } - -done: - return slice_num; - -new_stat: - if (new_stat) { - return new_stat->key; - } else { - return 0; - } -} - -int get_slice_from_istat(struct mdhim_t *md, struct index_t *index, - int cur_slice, uint64_t istat, int op) { - int slice_num = 0; - struct mdhim_stat *cur_stat, *new_stat; - - if (!index->stats) { - return 0; - } - - new_stat = cur_stat = NULL; - //Get the stat struct for our current slice - HASH_FIND_INT(index->stats, &cur_slice, cur_stat); - - switch(op) { - case MDHIM_GET_NEXT: - if (cur_stat && *(uint64_t *)cur_stat->max > istat && - *(uint64_t *)cur_stat->min <= istat) { - slice_num = cur_slice; - goto done; - } else { - new_stat = get_next_slice_stat(md, index, cur_slice); - goto new_stat; - } - - break; - case MDHIM_GET_PREV: - if (cur_stat && *(uint64_t *)cur_stat->min < istat && - *(uint64_t *)cur_stat->max >= istat ) { - slice_num = cur_slice; - goto done; - } else { - new_stat = get_prev_slice_stat(md, index, cur_slice); - goto new_stat; - } - - break; - case MDHIM_GET_FIRST: - new_stat = get_first_slice_stat(md, index); - goto new_stat; - break; - case MDHIM_GET_LAST: - new_stat = get_last_slice_stat(md, index); - goto new_stat; - break; - default: - slice_num = 0; - break; - } - -done: - return slice_num; - -new_stat: - if (new_stat) { - return new_stat->key; - } else { - return 0; - } -} - -/* Iterate through the multi-level hash table in index->stats to find the range servers - that could have the key */ -rangesrv_list *get_rangesrvs_from_istat(struct mdhim_t *md, struct index_t *index, - uint64_t istat, int op) { - struct mdhim_stat *cur_rank, *cur_stat, *tmp, *tmp2; - rangesrv_list *head, *lp, *entry; - int slice_num = 0; - unsigned int num_slices; - unsigned int i; - - if (!index->stats) { - return 0; - } - - cur_stat = cur_rank = NULL; - head = lp = entry = NULL; - HASH_ITER(hh, index->stats, cur_rank, tmp) { - num_slices = HASH_COUNT(cur_rank->stats); - i = 0; - HASH_ITER(hh, cur_rank->stats, cur_stat, tmp2) { - if (cur_stat->num <= 0) { - continue; - } - - slice_num = -1; - switch(op) { - case MDHIM_GET_NEXT: - if (cur_stat && *(uint64_t *)cur_stat->max > istat && - *(uint64_t *)cur_stat->min - 1 <= istat) { - slice_num = cur_stat->key; - } - - break; - case MDHIM_GET_PREV: - if (cur_stat && *(uint64_t *)cur_stat->min < istat && - *(uint64_t *)cur_stat->max + 1 >= istat ) { - slice_num = cur_stat->key; - } - - break; - case MDHIM_GET_FIRST: - if (!i) { - slice_num = cur_stat->key; - } - break; - case MDHIM_GET_LAST: - if (i == num_slices - 1) { - slice_num = cur_stat->key; - } - break; - case MDHIM_GET_EQ: - if (cur_stat && *(uint64_t *)cur_stat->max >= istat && - *(uint64_t *)cur_stat->min <= istat) { - slice_num = cur_stat->key; - } - - break; - default: - slice_num = 0; - break; - } - - if (slice_num < 0) { - continue; - } - - entry = malloc(sizeof(rangesrv_list)); - memset(entry, 0, sizeof(rangesrv_list)); - HASH_FIND_INT(index->rangesrvs_by_rank, &cur_rank->key, entry->ri); - if (!entry->ri) { - free(entry); - continue; - } - - if (!head) { - lp = head = entry; - } else { - lp->next = entry; - lp = lp->next; - } - - break; - } - } - - return head; -} - -/* Iterate through the multi-level hash table in index->stats to find the range servers - that could have the key */ -rangesrv_list *get_rangesrvs_from_fstat(struct mdhim_t *md, struct index_t *index, - long double fstat, int op) { - struct mdhim_stat *cur_rank, *cur_stat, *tmp, *tmp2; - rangesrv_list *head, *lp, *entry; - int slice_num = 0; - unsigned int num_slices; - unsigned int i; - - if (!index->stats) { - return 0; - } - - cur_stat = cur_rank = NULL; - head = lp = entry = NULL; - HASH_ITER(hh, index->stats, cur_rank, tmp) { - num_slices = HASH_COUNT(cur_rank->stats); - i = 0; - HASH_ITER(hh, cur_rank->stats, cur_stat, tmp2) { - if (cur_stat->num <= 0) { - continue; - } - - slice_num = -1; - switch(op) { - case MDHIM_GET_NEXT: - if (cur_stat && *(long double *)cur_stat->max > fstat && - *(long double *)cur_stat->min - 1.0L <= fstat) { - slice_num = cur_stat->key; - } - break; - case MDHIM_GET_PREV: - if (cur_stat && *(long double *)cur_stat->min < fstat && - *(long double *)cur_stat->max + 1.0L >= fstat ) { - slice_num = cur_stat->key; - } - - break; - case MDHIM_GET_FIRST: - if (!i) { - slice_num = cur_stat->key; - } - break; - case MDHIM_GET_LAST: - if (i == num_slices - 1) { - slice_num = cur_stat->key; - } - break; - case MDHIM_GET_EQ: - if (cur_stat && *(long double *)cur_stat->max >= fstat && - *(long double *)cur_stat->min <= fstat) { - slice_num = cur_stat->key; - } - - break; - default: - slice_num = 0; - break; - } - - if (slice_num < 0) { - continue; - } - - entry = malloc(sizeof(rangesrv_list)); - HASH_FIND_INT(index->rangesrvs_by_rank, &cur_rank->key, entry->ri); - if (!entry->ri) { - free(entry); - continue; - } - - if (!head) { - lp = head = entry; - } else { - lp->next = entry; - lp = lp->next; - } - - break; - } - } - - return head; -} - -/** - * get_range_server_from_stats - * - * gets the range server based on the stats acquired from a stat flush - * @param md main MDHIM struct - * @param key pointer to the key to find the range server of - * @param key_len length of the key - * @param op operation type ( - * @return the rank of the range server or NULL on error - */ -rangesrv_list *get_range_servers_from_stats(struct mdhim_t *md, struct index_t *index, - void *key, int key_len, int op) { - //The number that maps a key to range server (dependent on key type) -// printf("get range servers from stats"); -// fflush(stdout); - int slice_num, cur_slice; - //The range server number that we return - rangesrv_info *ret_rp; - rangesrv_list *rl; - int float_type = 0; - long double fstat = 0; - uint64_t istat = 0; - - if (key && key_len) { - //Find the slice based on the operation and key value - if (index->key_type == MDHIM_STRING_KEY) { - fstat = get_str_num(key, key_len); - } else if (index->key_type == MDHIM_FLOAT_KEY) { - fstat = *(float *) key; - } else if (index->key_type == MDHIM_DOUBLE_KEY) { - fstat = *(double *) key; - } else if (index->key_type == MDHIM_INT_KEY) { - istat = *(uint32_t *) key; - } else if (index->key_type == MDHIM_LONG_INT_KEY) { - istat = *(uint64_t *) key; - } else if (index->key_type == MDHIM_BYTE_KEY) { - fstat = get_byte_num(key, key_len); - } - } - - //If we don't have any stats info, then return null - if (!index->stats) { - mlog(MDHIM_CLIENT_CRIT, "Rank: %d - No statistics data available." - " Perform a mdhimStatFlush first.", - md->mdhim_rank); - return NULL; - } - - if (index->type != LOCAL_INDEX) { - - cur_slice = slice_num = 0; - float_type = is_float_key(index->key_type); - - //Get the current slice number of our key - if (key && key_len) { - cur_slice = get_slice_num(md, index, key, key_len); - - if (cur_slice == MDHIM_ERROR) { - - mlog(MDHIM_CLIENT_CRIT, "Rank: %d - Error: could not determine a" - " valid a slice number", - md->mdhim_rank); - return NULL; - } - } else if (op != MDHIM_GET_FIRST && op != MDHIM_GET_LAST) { - //If the op is not first or last, then we expect a key - - return NULL; - } - - if (float_type) { - slice_num = get_slice_from_fstat(md, index, cur_slice, fstat, op); - } else { - slice_num = get_slice_from_istat(md, index, cur_slice, istat, op); - } - - if (slice_num == MDHIM_ERROR) { - return NULL; - } - - ret_rp = get_range_server_by_slice(md, index, slice_num); - if (!ret_rp) { - mlog(MDHIM_CLIENT_INFO, "Rank: %d - Did not get a valid range server from" - " get_range_server_by_size", - md->mdhim_rank); - return NULL; - } - - rl = NULL; - _add_to_rangesrv_list(&rl, ret_rp); - } else { - if (float_type) { - rl = get_rangesrvs_from_fstat(md, index, fstat, op); - } else { - rl = get_rangesrvs_from_istat(md, index, istat, op); - } - } - - //Return the range server information - return rl; -} - -rangesrv_list *get_range_servers_from_range(struct mdhim_t *md, struct index_t *index, - void *start_key, void *end_key, int key_len) { - //The number that maps a key to range server (dependent on key type) - - int start_slice, end_slice; - //The range server number that we return - rangesrv_info *ret_rp; - rangesrv_list *rl; - - //If we don't have any stats info, then return null - if (!index->stats) { - mlog(MDHIM_CLIENT_CRIT, "Rank: %d - No statistics data available." - " Perform a mdhimStatFlush first.", - md->mdhim_rank); - return NULL; - } - - gettimeofday(&calslicestart, NULL); - start_slice = get_slice_num(md, index, start_key, key_len); - if (start_slice == MDHIM_ERROR) { - mlog(MDHIM_CLIENT_CRIT, "Rank: %d - Error: could not determine a" - " valid a slice number", - md->mdhim_rank); - return NULL; - } - - end_slice = get_slice_num(md, index, end_key, key_len); - if (end_slice == MDHIM_ERROR) { - mlog(MDHIM_CLIENT_CRIT, "Rank: %d - Error: could not determine a valid a slice number", md->mdhim_rank); - return NULL; - } - gettimeofday(&calsliceend, NULL); - calslicetime+=1000000*(calsliceend.tv_sec-calslicestart.tv_sec)+calsliceend.tv_usec-calslicestart.tv_usec; - - long i; - rl = NULL; - - for (i = start_slice; i <= end_slice; i++) { - struct mdhim_stat *cur_stat; - gettimeofday(&rangehashstart, NULL); - HASH_FIND_INT(index->stats, &i, cur_stat); - gettimeofday(&rangehashend, NULL); - rangehashtime+=1000000*(rangehashend.tv_sec-rangehashstart.tv_sec)+rangehashend.tv_usec-rangehashstart.tv_usec; - gettimeofday(&serhashstart, NULL); - ret_rp = get_range_server_by_slice(md, index, i); - gettimeofday(&serhashend, NULL); - serhashtime+=1000000*(serhashend.tv_sec-serhashstart.tv_sec)+serhashend.tv_usec-serhashstart.tv_usec; - - if (!ret_rp) { - mlog(MDHIM_CLIENT_INFO, "Rank: %d - Did not get a valid range server from" - " get_range_server_by_size", - md->mdhim_rank); - return NULL; - } - ret_rp->num_recs = 0; - ret_rp->first_key = NULL; - - } - - for (i = start_slice; i <= end_slice; i++) { - struct mdhim_stat *cur_stat; - gettimeofday(&rangehashstart, NULL); - HASH_FIND_INT(index->stats, &i, cur_stat); - gettimeofday(&rangehashend, NULL); - rangehashtime+=1000000*(rangehashend.tv_sec-rangehashstart.tv_sec)+rangehashend.tv_usec-rangehashstart.tv_usec; - - gettimeofday(&serhashstart, NULL); - ret_rp = get_range_server_by_slice(md, index, i); - gettimeofday(&serhashend, NULL); - serhashtime+=1000000*(serhashend.tv_sec-serhashstart.tv_sec)+serhashend.tv_usec-serhashstart.tv_usec; - if (!ret_rp) { - mlog(MDHIM_CLIENT_INFO, "Rank: %d - Did not get a valid range server from" - " get_range_server_by_size", - md->mdhim_rank); - return NULL; - } - - ret_rp->num_recs += cur_stat->num; - if (ret_rp->first_key == NULL) { - ret_rp->first_key = cur_stat->min; - } - else { - if (unifyfs_compare(ret_rp->first_key, cur_stat->min) > 0 ) { - ret_rp->first_key = cur_stat->min; - } - } - - _add_to_rangesrv_list(&rl, ret_rp); - } - - //Return the range server information - return rl; - -} diff --git a/meta/src/partitioner.h b/meta/src/partitioner.h deleted file mode 100644 index 330d65528..000000000 --- a/meta/src/partitioner.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * - * Copyright 2017, UT-Battelle, LLC. - * - * LLNL-CODE-741539 - * All rights reserved. - * - * This is the license for UnifyFS. - * For details, see https://github.com/LLNL/UnifyFS. - * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. - */ - -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * Copyright (c) 2017, Florida State University. Contributions from - * the Computer Architecture and Systems Research Laboratory (CASTL) - * at the Department of Computer Science. - * - * Written by: Teng Wang, Adam Moody, Weikuan Yu, Kento Sato, Kathryn Mohror - * LLNL-CODE-728877. All rights reserved. - * - * This file is part of burstfs. - * For details, see https://github.com/llnl/burstfs - * Please read https://github.com/llnl/burstfs/LICENSE for full license text. - */ - -/* - * - * Copyright (c) 2014, Los Alamos National Laboratory - * All rights reserved. - * - */ - -#ifndef __HASH_H -#define __HASH_H - -#include "mdhim.h" -#include "uthash.h" -#include "indexes.h" - -/* Used to determine if a rank is a range server - Works like this: - if myrank % RANGE_SERVER_FACTOR == 0, then I'm a range server - if all the keys haven't been covered yet - - if the number of ranks is less than the RANGE_SERVER_FACTOR, - then the last rank will be the range server -*/ - -#ifdef __cplusplus -extern "C" -{ -#endif -//#define RANGE_SERVER_FACTOR 4 // NOW a global variable in partitioner.h -#define MDHIM_MAX_SLICES 2147483647 -//32 bit unsigned integer -#define MDHIM_INT_KEY 1 -#define MDHIM_LONG_INT_KEY 2 -#define MDHIM_FLOAT_KEY 3 -#define MDHIM_DOUBLE_KEY 4 -#define MDHIM_STRING_KEY 5 -//An arbitrary sized key -#define MDHIM_BYTE_KEY 6 -#define MDHIM_UNIFYFS_KEY 7 - -//Maximum length of a key -#define MAX_KEY_LEN 1048576 - -/* The exponent used for the algorithm that determines the range server - - This exponent, should cover the number of characters in our alphabet - if 2 is raised to that power. If the exponent is 6, then, 64 characters are covered -*/ -#define MDHIM_ALPHABET_EXPONENT 6 - -//Used for hashing strings to the appropriate range server -struct mdhim_char { - int id; /* we'll use this field as the key */ - int pos; - UT_hash_handle hh; /* makes this structure hashable */ -}; - -typedef struct rangesrv_list rangesrv_list; -struct rangesrv_list { - rangesrv_info *ri; - rangesrv_list *next; -}; - -void partitioner_init(); -void partitioner_release(); -rangesrv_list *get_range_servers(struct mdhim_t *md, struct index_t *index, - void *key, int key_len); -rangesrv_info *get_range_server_by_slice(struct mdhim_t *md, - struct index_t *index, int slice); -void build_alphabet(); -int verify_key(struct index_t *index, void *key, int key_len, int key_type); -long double get_str_num(void *key, uint32_t key_len); - //long double get_byte_num(void *key, uint32_t key_len); -uint64_t get_byte_num(void *key, uint32_t key_len); -int get_slice_num(struct mdhim_t *md, struct index_t *index, void *key, int key_len); -int is_float_key(int type); - -rangesrv_list *get_range_servers_from_stats(struct mdhim_t *md, struct index_t *index, - void *key, int key_len, int op); -rangesrv_list *get_range_servers_from_range(struct mdhim_t *md, struct index_t *index, - void *start_key, void *end_key, int key_len); - -void* copy_unifyfs_key(void* key, uint32_t key_len); - -#ifdef __cplusplus -} -#endif -#endif diff --git a/meta/src/range_server.c b/meta/src/range_server.c deleted file mode 100644 index 460f1f3c3..000000000 --- a/meta/src/range_server.c +++ /dev/null @@ -1,1512 +0,0 @@ -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * - * Copyright 2017, UT-Battelle, LLC. - * - * LLNL-CODE-741539 - * All rights reserved. - * - * This is the license for UnifyFS. - * For details, see https://github.com/LLNL/UnifyFS. - * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. - */ - -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * Copyright (c) 2017, Florida State University. Contributions from - * the Computer Architecture and Systems Research Laboratory (CASTL) - * at the Department of Computer Science. - * - * Written by: Teng Wang, Adam Moody, Weikuan Yu, Kento Sato, Kathryn Mohror - * LLNL-CODE-728877. All rights reserved. - * - * This file is part of burstfs. - * For details, see https://github.com/llnl/burstfs - * Please read https://github.com/llnl/burstfs/LICENSE for full license text. - */ - -/* - * - * Copyright (c) 2014, Los Alamos National Laboratory - * All rights reserved. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "ds_leveldb.h" -#include "mdhim.h" -#include "mdhim_options.h" -#include "partitioner.h" -#include "range_server.h" -#include "unifyfs_metadata_mdhim.h" -#include "uthash.h" - -int recv_counter = 0; - -struct timeval resp_put_comm_start, resp_put_comm_end; -double resp_put_comm_time = 0; - -struct timeval resp_get_comm_start, resp_get_comm_end; -double resp_get_comm_time = 0; -struct index_t *tmp_index; - -struct timeval worker_start, worker_end; -double worker_time=0; - -struct timeval worker_get_start, worker_get_end; -double worker_get_time=0; - -struct timeval worker_put_start, worker_put_end; -double worker_put_time=0; - -struct timeval stat_start, stat_end; -double stat_time=0; - -struct timeval odbgetstart, odbgetend; -double odbgettime=0; - -struct timeval bputstart, bputend; -double bputtime=0; - -struct timeval statstart, statend; -double starttime=0; - -int putflag = 1; - -int unifyfs_compare(const char* a, const char* b) { - int rc; - unifyfs_key_t *keya = (unifyfs_key_t *)a; - unifyfs_key_t *keyb = (unifyfs_key_t *)b; - rc = unifyfs_key_compare(keya, keyb); - return rc; -} - -void add_timing(struct timeval start, struct timeval end, int num, - struct mdhim_t *md, int mtype) { - long double elapsed; - - elapsed = (long double) (end.tv_sec - start.tv_sec) + - ((long double) (end.tv_usec - start.tv_usec)/1000000.0); - if (mtype == MDHIM_PUT || mtype == MDHIM_BULK_PUT) { - md->mdhim_rs->put_time += elapsed; - md->mdhim_rs->num_put += num; - } else if (mtype == MDHIM_BULK_GET) { - md->mdhim_rs->get_time += elapsed; - md->mdhim_rs->num_get += num; - } -} - -/** - * send_locally_or_remote - * Sends the message remotely or locally - * - * @param md Pointer to the main MDHIM structure - * @param dest Destination rank - * @param message pointer to message to send - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - */ -int send_locally_or_remote(struct mdhim_t *md, int dest, void *message) { - int ret = MDHIM_SUCCESS; - MPI_Request **size_req, **msg_req; - int *sizebuf; - void **sendbuf; - - if (md->mdhim_rank != dest) { - //Sends the message remotely - size_req = malloc(sizeof(MPI_Request *)); - msg_req = malloc(sizeof(MPI_Request *)); - sendbuf = malloc(sizeof(void *)); - sizebuf = malloc(sizeof(int)); - ret = send_client_response(md, dest, message, sizebuf, - sendbuf, size_req, msg_req); - - if (*size_req) { - range_server_add_oreq(md, *size_req, sizebuf); - } else { - free(sizebuf); - } - - if (*msg_req) { - range_server_add_oreq(md, *msg_req, *sendbuf); - } else if (*sendbuf) { - free(*sendbuf); - } - - free(sendbuf); - mdhim_full_release_msg(message); - free(size_req); - free(msg_req); - } else { - //Sends the message locally - pthread_mutex_lock(md->receive_msg_mutex); - md->receive_msg = message; - pthread_mutex_unlock(md->receive_msg_mutex); - pthread_cond_signal(md->receive_msg_ready_cv); - } - - return ret; -} - -struct index_t *find_index(struct mdhim_t *md, struct mdhim_basem_t *msg) { - struct index_t *ret; - - ret = get_index(md, msg->index); - - return ret; - -} - - -/* - * === FUNCTION ====================================================================== - * Name: find_index_by_name - * Description: Search for index by name - * Variables: the pointer to the mdhim structure - * A pointer to a base message that contains - * the name of the index - * ===================================================================================== - */ -struct index_t * find_index_by_name(struct mdhim_t *md, struct mdhim_basem_t *msg) { - struct index_t *ret; - - ret = get_index_by_name(md, msg->index_name); - - return ret; -} - -/** - * range_server_add_work - * Adds work to the work queue and signals the condition variable for the worker thread - * - * @param md Pointer to the main MDHIM structure - * @param item pointer to new work item that contains a message to handle - * @return MDHIM_SUCCESS - */ -int range_server_add_work(struct mdhim_t *md, work_item *item) { - //Lock the work queue mutex - pthread_mutex_lock(md->mdhim_rs->work_queue_mutex); - item->next = NULL; - item->prev = NULL; - - //Add work to the tail of the work queue - if (md->mdhim_rs->work_queue->tail) { - md->mdhim_rs->work_queue->tail->next = item; - item->prev = md->mdhim_rs->work_queue->tail; - md->mdhim_rs->work_queue->tail = item; - } else { - md->mdhim_rs->work_queue->head = item; - md->mdhim_rs->work_queue->tail = item; - } - - //Signal the waiting thread that there is work available - pthread_mutex_unlock(md->mdhim_rs->work_queue_mutex); - pthread_cond_signal(md->mdhim_rs->work_ready_cv); - - return MDHIM_SUCCESS; -} - -/** - * get_work - * Returns the next work item from the work queue - * - * @param md Pointer to the main MDHIM structure - * @return the next work_item to process - */ - -work_item *get_work(struct mdhim_t *md) { - work_item *item; - - item = md->mdhim_rs->work_queue->head; - if (!item) { - return NULL; - } - - //Set the list head and tail to NULL - md->mdhim_rs->work_queue->head = NULL; - md->mdhim_rs->work_queue->tail = NULL; - - //Return the list - return item; -} - -/** - * range_server_stop - * Stop the range server (i.e., stops the threads and frees the relevant data in md) - * - * @param md Pointer to the main MDHIM structure - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - */ -int range_server_stop(struct mdhim_t *md) { - int i, ret; - work_item *head, *temp_item; - - //Signal to the listener thread that it needs to shutdown - md->shutdown = 1; - - /* Wait for the threads to finish */ - pthread_cond_broadcast(md->mdhim_rs->work_ready_cv); - pthread_join(md->mdhim_rs->listener, NULL); - /* Wait for the threads to finish */ - for (i = 0; i < md->db_opts->num_wthreads; i++) { - pthread_join(*md->mdhim_rs->workers[i], NULL); - free(md->mdhim_rs->workers[i]); - } - free(md->mdhim_rs->workers); - - //Destroy the condition variables - if ((ret = pthread_cond_destroy(md->mdhim_rs->work_ready_cv)) != 0) { - mlog(MDHIM_SERVER_DBG, "Rank: %d - Error destroying work cond variable", - md->mdhim_rank); - } - free(md->mdhim_rs->work_ready_cv); - - //Destroy the work queue mutex - if ((ret = pthread_mutex_destroy(md->mdhim_rs->work_queue_mutex)) != 0) { - mlog(MDHIM_SERVER_DBG, "Rank: %d - Error destroying work queue mutex", - md->mdhim_rank); - } - free(md->mdhim_rs->work_queue_mutex); - - //Clean outstanding sends - range_server_clean_oreqs(md); - //Destroy the out req mutex - if ((ret = pthread_mutex_destroy(md->mdhim_rs->out_req_mutex)) != 0) { - mlog(MDHIM_SERVER_DBG, "Rank: %d - Error destroying work queue mutex", - md->mdhim_rank); - } - free(md->mdhim_rs->out_req_mutex); - - //Free the work queue - head = md->mdhim_rs->work_queue->head; - while (head) { - temp_item = head->next; - free(head); - head = temp_item; - } - free(md->mdhim_rs->work_queue); - - mlog(MDHIM_SERVER_INFO, "Rank: %d - Inserted: %ld records in %Lf seconds", - md->mdhim_rank, md->mdhim_rs->num_put, md->mdhim_rs->put_time); - mlog(MDHIM_SERVER_INFO, "Rank: %d - Retrieved: %ld records in %Lf seconds", - md->mdhim_rank, md->mdhim_rs->num_get, md->mdhim_rs->get_time); - - //Free the range server data - free(md->mdhim_rs); - md->mdhim_rs = NULL; - - return MDHIM_SUCCESS; -} - -/** - * range_server_put - * Handles the put message and puts data in the database - * - * @param md pointer to the main MDHIM struct - * @param im pointer to the put message to handle - * @param source source of the message - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - */ -int range_server_put(struct mdhim_t *md, struct mdhim_putm_t *im, int source) { - int ret; - struct mdhim_rm_t *rm; - int error = 0; - void **value; - int32_t *value_len; - int exists = 0; - void *new_value; - int32_t new_value_len; - void *old_value; - int32_t old_value_len; - struct timeval start, end; - int inserted = 0; - struct index_t *index; - - value = malloc(sizeof(void *)); - *value = NULL; - value_len = malloc(sizeof(int32_t)); - *value_len = 0; - - //Get the index referenced the message - index = find_index(md, (struct mdhim_basem_t *) im); - if (!index) { - mlog(MDHIM_SERVER_CRIT, "Rank: %d - Error retrieving index for id: %d", - md->mdhim_rank, im->basem.index); - error = MDHIM_ERROR; - goto done; - } - - gettimeofday(&start, NULL); - //Check for the key's existence -/* index->mdhim_store->get(index->mdhim_store->db_handle, - im->key, im->key_len, value, - value_len); -*/ - //The key already exists - if (*value && *value_len) { - exists = 1; - } - - //If the option to append was specified and there is old data, concat the old and new - if (exists && md->db_opts->db_value_append == MDHIM_DB_APPEND) { - old_value = *value; - old_value_len = *value_len; - new_value_len = old_value_len + im->value_len; - new_value = malloc(new_value_len); - memcpy(new_value, old_value, old_value_len); - memcpy(new_value + old_value_len, im->value, im->value_len); - } else { - new_value = im->value; - new_value_len = im->value_len; - } - - if (*value && *value_len) { - free(*value); - } - free(value); - free(value_len); - //Put the record in the database - if ((ret = - index->mdhim_store->put(index->mdhim_store->db_handle, - im->key, im->key_len, new_value, - new_value_len)) != MDHIM_SUCCESS) { - mlog(MDHIM_SERVER_CRIT, "Rank: %d - Error putting record", - md->mdhim_rank); - error = ret; - } else { - inserted = 1; - } - - if (!exists && error == MDHIM_SUCCESS) { - gettimeofday(&stat_start, NULL); - update_stat(md, index, im->key, im->key_len); - gettimeofday(&stat_end, NULL); - stat_time += 1000000 * (stat_end.tv_sec - stat_start.tv_sec) + \ - stat_end.tv_usec - stat_start.tv_usec; - } - - gettimeofday(&end, NULL); - add_timing(start, end, inserted, md, MDHIM_PUT); - -done: - //Create the response message - rm = malloc(sizeof(struct mdhim_rm_t)); - //Set the type - rm->basem.mtype = MDHIM_RECV; - //Set the operation return code as the error - rm->error = error; - //Set the server's rank - rm->basem.server_rank = md->mdhim_rank; - - //Send response - ret = send_locally_or_remote(md, source, rm); - - //Free memory - if (exists && md->db_opts->db_value_append == MDHIM_DB_APPEND) { - free(new_value); - } - if (source != md->mdhim_rank) { - free(im->key); - free(im->value); - } - free(im); - return MDHIM_SUCCESS; -} - - -/** - * range_server_bput - * Handles the bulk put message and puts data in the database - * - * @param md Pointer to the main MDHIM struct - * @param bim pointer to the bulk put message to handle - * @param source source of the message - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - */ -int range_server_bput(struct mdhim_t *md, struct mdhim_bputm_t *bim, int source) { - putflag = 1; - int i; - int ret; - int error = MDHIM_SUCCESS; - struct mdhim_rm_t *brm; - void **value; - int32_t *value_len; - int *exists; - void *new_value; - int32_t new_value_len; - void **new_values; - int32_t *new_value_lens; - void *old_value; - int32_t old_value_len; - struct timeval start, end; - int num_put = 0; - struct index_t *index; - - gettimeofday(&start, NULL); - gettimeofday(&bputstart, NULL); - exists = malloc(bim->num_keys * sizeof(int)); - new_values = malloc(bim->num_keys * sizeof(void *)); - new_value_lens = malloc(bim->num_keys * sizeof(int)); - value = malloc(sizeof(void *)); - value_len = malloc(sizeof(int32_t)); - - //Get the index referenced the message - index = find_index(md, (struct mdhim_basem_t *) bim); - if (!index) { - mlog(MDHIM_SERVER_CRIT, "Rank: %d - Error retrieving index for id: %d", - md->mdhim_rank, bim->basem.index); - error = MDHIM_ERROR; - goto done; - } - gettimeofday(&bputend, NULL); - bputtime+=1000000 * (bputend.tv_sec - bputstart.tv_sec)\ - + bputend.tv_usec - bputstart.tv_usec; - - for (i = 0; i < bim->num_keys && i < MAX_BULK_OPS; i++) { - *value = NULL; - *value_len = 0; - - gettimeofday(&odbgetstart, NULL); -/* index->mdhim_store->get(index->mdhim_store->db_handle, - bim->keys[i], bim->key_lens[i], value, - value_len); -*/ - if (*value && *value_len) { - exists[i] = 1; - } else { - exists[i] = 0; - } - - exists[i] = 0; - if (exists[i] && md->db_opts->db_value_append == MDHIM_DB_APPEND) { - old_value = *value; - old_value_len = *value_len; - new_value_len = old_value_len + bim->value_lens[i]; - new_value = malloc(new_value_len); - memcpy(new_value, old_value, old_value_len); - memcpy(new_value + old_value_len, bim->values[i], bim->value_lens[i]); - if (exists[i] && source != md->mdhim_rank) { - free(bim->values[i]); - } - - new_values[i] = new_value; - new_value_lens[i] = new_value_len; - } else { - new_values[i] = bim->values[i]; - new_value_lens[i] = bim->value_lens[i]; - - } - - if (*value) { - free(*value); - } - gettimeofday(&odbgetend, NULL); - odbgettime+=1000000 * (odbgetend.tv_sec\ - - odbgetstart.tv_sec) + odbgetend.tv_usec - odbgetstart.tv_usec; - } - - //Put the record in the database - if ((ret = - index->mdhim_store->batch_put(index->mdhim_store->db_handle, - bim->keys, bim->key_lens, new_values, - new_value_lens, bim->num_keys)) != MDHIM_SUCCESS) { - mlog(MDHIM_SERVER_CRIT, "Rank: %d - Error batch putting records", - md->mdhim_rank); - error = ret; - } else { - num_put = bim->num_keys; - } - - gettimeofday(&stat_start, NULL); - for (i = 0; i < bim->num_keys && i < MAX_BULK_OPS; i++) { - //Update the stats if this key didn't exist before - if (!exists[i] && error == MDHIM_SUCCESS) { - update_stat(md, index, bim->keys[i], bim->key_lens[i]); - } - - if (exists[i] && md->db_opts->db_value_append == MDHIM_DB_APPEND) { - //Release the value created for appending the new and old value - free(new_values[i]); - } - - //Release the bput keys/value if the message isn't coming from myself - if (source != md->mdhim_rank) { - free(bim->keys[i]); - free(bim->values[i]); - } - } - gettimeofday(&stat_end, NULL); - stat_time += 1000000 * (stat_end.tv_sec - stat_start.tv_sec) + \ - stat_end.tv_usec - stat_start.tv_usec; - - free(exists); - free(new_values); - free(new_value_lens); - free(value); - free(value_len); - gettimeofday(&end, NULL); - add_timing(start, end, num_put, md, MDHIM_BULK_PUT); - - done: - //Create the response message - brm = malloc(sizeof(struct mdhim_rm_t)); - //Set the type - brm->basem.mtype = MDHIM_RECV; - //Set the operation return code as the error - brm->error = error; - //Set the server's rank - brm->basem.server_rank = md->mdhim_rank; - - //Release the internals of the bput message - free(bim->keys); - free(bim->key_lens); - free(bim->values); - free(bim->value_lens); - free(bim); - - //Send response - gettimeofday(&resp_put_comm_start, NULL); - ret = send_locally_or_remote(md, source, brm); - - return MDHIM_SUCCESS; -} - -/** - * range_server_del - * Handles the delete message and deletes the data from the database - * - * @param md Pointer to the main MDHIM struct - * @param dm pointer to the delete message to handle - * @param source source of the message - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - */ -int range_server_del(struct mdhim_t *md, struct mdhim_delm_t *dm, int source) { - int ret = MDHIM_ERROR; - struct mdhim_rm_t *rm; - struct index_t *index; - - //Get the index referenced the message - index = find_index(md, (struct mdhim_basem_t *) dm); - if (!index) { - mlog(MDHIM_SERVER_CRIT, "Rank: %d - Error retrieving index for id: %d", - md->mdhim_rank, dm->basem.index); - ret = MDHIM_ERROR; - goto done; - } - - //Put the record in the database - if ((ret = - index->mdhim_store->del(index->mdhim_store->db_handle, - dm->key, dm->key_len)) != MDHIM_SUCCESS) { - mlog(MDHIM_SERVER_CRIT, "Rank: %d - Error deleting record", - md->mdhim_rank); - } - - done: - //Create the response message - rm = malloc(sizeof(struct mdhim_rm_t)); - //Set the type - rm->basem.mtype = MDHIM_RECV; - //Set the operation return code as the error - rm->error = ret; - //Set the server's rank - rm->basem.server_rank = md->mdhim_rank; - - //Send response - ret = send_locally_or_remote(md, source, rm); - free(dm); - - return MDHIM_SUCCESS; -} - -/** - * range_server_bdel - * Handles the bulk delete message and deletes the data from the database - * - * @param md Pointer to the main MDHIM struct - * @param bdm pointer to the bulk delete message to handle - * @param source source of the message - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - */ -int range_server_bdel(struct mdhim_t *md, struct mdhim_bdelm_t *bdm, int source) { - int i; - int ret; - int error = 0; - struct mdhim_rm_t *brm; - struct index_t *index; - - //Get the index referenced the message - index = find_index(md, (struct mdhim_basem_t *) bdm); - if (!index) { - mlog(MDHIM_SERVER_CRIT, "Rank: %d - Error retrieving index for id: %d", - md->mdhim_rank, bdm->basem.index); - error = MDHIM_ERROR; - goto done; - } - - //Iterate through the arrays and delete each record - for (i = 0; i < bdm->num_keys && i < MAX_BULK_OPS; i++) { - //Put the record in the database - if ((ret = - index->mdhim_store->del(index->mdhim_store->db_handle, - bdm->keys[i], bdm->key_lens[i])) - != MDHIM_SUCCESS) { - mlog(MDHIM_SERVER_CRIT, "Rank: %d - Error deleting record", - md->mdhim_rank); - error = ret; - } - } - -done: - //Create the response message - brm = malloc(sizeof(struct mdhim_rm_t)); - //Set the type - brm->basem.mtype = MDHIM_RECV; - //Set the operation return code as the error - brm->error = error; - //Set the server's rank - brm->basem.server_rank = md->mdhim_rank; - - //Send response - ret = send_locally_or_remote(md, source, brm); - free(bdm->keys); - free(bdm->key_lens); - free(bdm); - - return MDHIM_SUCCESS; -} - -/** - * range_server_commit - * Handles the commit message and commits outstanding writes to the database - * - * @param md pointer to the main MDHIM struct - * @param im pointer to the commit message to handle - * @param source source of the message - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - */ -int range_server_commit(struct mdhim_t *md, struct mdhim_basem_t *im, int source) { - int ret; - struct mdhim_rm_t *rm; - struct index_t *index; - - //Get the index referenced the message - index = find_index(md, (struct mdhim_basem_t *) im); - if (!index) { - mlog(MDHIM_SERVER_CRIT, "Rank: %d - Error retrieving index for id: %d", - md->mdhim_rank, im->index); - ret = MDHIM_ERROR; - goto done; - } - - //Put the record in the database - if ((ret = - index->mdhim_store->commit(index->mdhim_store->db_handle)) - != MDHIM_SUCCESS) { - mlog(MDHIM_SERVER_CRIT, "Rank: %d - Error committing database", - md->mdhim_rank); - } - - done: - //Create the response message - rm = malloc(sizeof(struct mdhim_rm_t)); - //Set the type - rm->basem.mtype = MDHIM_RECV; - //Set the operation return code as the error - rm->error = ret; - //Set the server's rank - rm->basem.server_rank = md->mdhim_rank; - - //Send response - ret = send_locally_or_remote(md, source, rm); - free(im); - - return MDHIM_SUCCESS; -} - -/** - * range_server_bget - * Handles the bulk get message, retrieves the data from the database, and sends the results back - * - * @param md Pointer to the main MDHIM struct - * @param bgm pointer to the bulk get message to handle - * @param source source of the message - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - */ -int range_server_bget(struct mdhim_t *md, struct mdhim_bgetm_t *bgm, int source) { - putflag = 0; - int ret; - void **values = NULL; - int32_t *value_lens = NULL; - int i; - struct mdhim_bgetrm_t *bgrm; - int error = 0; - struct timeval start, end; - int num_retrieved = 0; - struct index_t *index; - - gettimeofday(&start, NULL); - if (bgm->op != MDHIM_RANGE_BGET) { - values = (void **) calloc(bgm->num_keys, sizeof(void *)); - value_lens = (int32_t *) calloc(bgm->num_keys, sizeof(int32_t)); - } - //Get the index referenced the message - index = find_index(md, (struct mdhim_basem_t *) bgm); - if (!index) { - mlog(MDHIM_SERVER_CRIT, "Rank: %d - Error retrieving index for id: %d", - md->mdhim_rank, bgm->basem.index); - error = MDHIM_ERROR; - goto done; - } - - if (bgm->op == MDHIM_RANGE_BGET) { - void **ret_keys; - int32_t *ret_key_lens; - int num_ranges = bgm->num_keys / 2; - int out_record_cnt = 0; - leveldb_batch_ranges(index->mdhim_store->db_handle, - (char **)bgm->keys, bgm->key_lens, - (char ***)&ret_keys, &ret_key_lens, - (char ***)&values, &value_lens, - num_ranges, &out_record_cnt); - - if (source != md->mdhim_rank) { - for (i = 0; i < bgm->num_keys; i++) { - free(bgm->keys[i]); - } - } - free(bgm->key_lens); - free(bgm->keys); - - bgm->keys = ret_keys; - bgm->num_keys = out_record_cnt; - bgm->key_lens = ret_key_lens; - - } else { - for (i = 0; i < bgm->num_keys && i < MAX_BULK_OPS; i++) { - switch(bgm->op) { - // Gets the value for the given key - case MDHIM_GET_EQ: - //Get records from the database - - if ((ret = - index->mdhim_store->get(index->mdhim_store->db_handle, - bgm->keys[i], bgm->key_lens[i], &values[i], - &value_lens[i])) != MDHIM_SUCCESS) { - error = ret; - value_lens[i] = 0; - values[i] = NULL; - continue; - } - - break; - /* Gets the next key and value that is in order after the passed in key */ - case MDHIM_GET_NEXT: - if ((ret = - index->mdhim_store->get_next(index->mdhim_store->db_handle, - &bgm->keys[i], &bgm->key_lens[i], &values[i], - &value_lens[i])) != MDHIM_SUCCESS) { - mlog(MDHIM_SERVER_DBG, "Rank: %d - Error getting record", md->mdhim_rank); - error = ret; - value_lens[i] = 0; - values[i] = NULL; - continue; - } - - break; - /* Gets the previous key and value that is in order before the passed in key - or the last key if no key was passed in */ - case MDHIM_GET_PREV: - if ((ret = - index->mdhim_store->get_prev(index->mdhim_store->db_handle, - &bgm->keys[i], &bgm->key_lens[i], &values[i], - &value_lens[i])) != MDHIM_SUCCESS) { - mlog(MDHIM_SERVER_DBG, "Rank: %d - Error getting record", md->mdhim_rank); - error = ret; - value_lens[i] = 0; - values[i] = NULL; - continue; - } - - break; - /* Gets the first key/value */ - case MDHIM_GET_FIRST: - if ((ret = - index->mdhim_store->get_next(index->mdhim_store->db_handle, - &bgm->keys[i], 0, &values[i], - &value_lens[i])) != MDHIM_SUCCESS) { - mlog(MDHIM_SERVER_DBG, "Rank: %d - Error getting record", md->mdhim_rank); - error = ret; - value_lens[i] = 0; - values[i] = NULL; - continue; - } - - break; - /* Gets the last key/value */ - case MDHIM_GET_LAST: - if ((ret = - index->mdhim_store->get_prev(index->mdhim_store->db_handle, - &bgm->keys[i], 0, &values[i], - &value_lens[i])) != MDHIM_SUCCESS) { - mlog(MDHIM_SERVER_DBG, "Rank: %d - Error getting record", md->mdhim_rank); - error = ret; - value_lens[i] = 0; - values[i] = NULL; - continue; - } - - break; - default: - mlog(MDHIM_SERVER_DBG, "Rank: %d - Invalid operation: %d given in range_server_get", - md->mdhim_rank, bgm->op); - continue; - } - - num_retrieved++; - } - } - gettimeofday(&end, NULL); - add_timing(start, end, num_retrieved, md, MDHIM_BULK_GET); - -done: - //Create the response message - bgrm = malloc(sizeof(struct mdhim_bgetrm_t)); - //Set the type - bgrm->basem.mtype = MDHIM_RECV_BULK_GET; - //Set the operation return code as the error - bgrm->error = error; - //Set the server's rank - bgrm->basem.server_rank = md->mdhim_rank; - //Set the key and value - if (source == md->mdhim_rank) { - //If this message is coming from myself, copy the keys - bgrm->key_lens = malloc(bgm->num_keys * sizeof(int)); - bgrm->keys = malloc(bgm->num_keys * sizeof(void *)); - for (i = 0; i < bgm->num_keys; i++) { - bgrm->key_lens[i] = bgm->key_lens[i]; - bgrm->keys[i] = malloc(bgrm->key_lens[i]); - memcpy(bgrm->keys[i], bgm->keys[i], bgrm->key_lens[i]); - } - - free(bgm->keys); - free(bgm->key_lens); - } else { - bgrm->keys = bgm->keys; - bgrm->key_lens = bgm->key_lens; - } - - bgrm->values = values; - bgrm->value_lens = value_lens; - bgrm->num_keys = bgm->num_keys; - bgrm->basem.index = index->id; - bgrm->basem.index_type = index->type; - - //Send response - gettimeofday(&resp_get_comm_start, NULL); - ret = send_locally_or_remote(md, source, bgrm); - - //Release the bget message - free(bgm); - - return MDHIM_SUCCESS; -} - -/** - * range_server_bget_op - * Handles the get message given an op and number of records greater than 1 - * - * @param md Pointer to the main MDHIM struct - * @param gm pointer to the get message to handle - * @param source source of the message - * @param op operation to perform - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - */ -int range_server_bget_op(struct mdhim_t *md, struct mdhim_bgetm_t *bgm, int source, int op) { - putflag = 0; - int error = 0; - void **values; - void **keys; - void **get_key; //Used for passing the key to the db - int *get_key_len; //Used for passing the key len to the db - void **get_value; - int *get_value_len; - int32_t *key_lens; - int32_t *value_lens; - struct mdhim_bgetrm_t *bgrm; - int ret; - int i, j; - int num_records; - struct timeval start, end; - struct index_t *index; - - //Initialize pointers and lengths - values = malloc(sizeof(void *) * bgm->num_keys * bgm->num_recs); - value_lens = malloc(sizeof(int32_t) * bgm->num_keys * bgm->num_recs); - memset(value_lens, 0, sizeof(int32_t) *bgm->num_keys * bgm->num_recs); - keys = malloc(sizeof(void *) * bgm->num_keys * bgm->num_recs); - memset(keys, 0, sizeof(void *) * bgm->num_keys * bgm->num_recs); - key_lens = malloc(sizeof(int32_t) * bgm->num_keys * bgm->num_recs); - memset(key_lens, 0, sizeof(int32_t) * bgm->num_keys * bgm->num_recs); - get_key = malloc(sizeof(void *)); - *get_key = NULL; - get_key_len = malloc(sizeof(int32_t)); - *get_key_len = 0; - get_value = malloc(sizeof(void *)); - get_value_len = malloc(sizeof(int32_t)); - num_records = 0; - /* - printf("range server bget op\n"); - fflush(stdout); - */ - //Get the index referenced the message - index = find_index(md, (struct mdhim_basem_t *) bgm); - if (!index) { - mlog(MDHIM_SERVER_CRIT, "Rank: %d - Error retrieving index for id: %d", - md->mdhim_rank, bgm->basem.index); - error = MDHIM_ERROR; - goto respond; - } - - if (bgm->num_keys * bgm->num_recs > MAX_BULK_OPS) { - /* - printf("in range server, total bulk key%ld, recs %ld\n", bgm->num_keys, bgm->num_recs); - fflush(stdout); - */ - mlog(MDHIM_SERVER_CRIT, "Rank: %d - Too many bulk operations requested", - md->mdhim_rank); - error = MDHIM_ERROR; - goto respond; - } - -// mlog(MDHIM_SERVER_CRIT, "Rank: %d - Num keys is: %d and num recs is: %d", -// md->mdhim_rank, bgm->num_keys, bgm->num_recs); - - gettimeofday(&start, NULL); - //Iterate through the arrays and get each record - if (op != MDHIM_GET_NEXT) { - for (i = 0; i < bgm->num_keys; i++) { - for (j = 0; j < bgm->num_recs; j++) { - keys[num_records] = NULL; - key_lens[num_records] = 0; - - //If we were passed in a key, copy it - if (!j && bgm->key_lens[i] && bgm->keys[i]) { - *get_key = malloc(bgm->key_lens[i]); - memcpy(*get_key, bgm->keys[i], bgm->key_lens[i]); - *get_key_len = bgm->key_lens[i]; - //If we were not passed a key and this is a next/prev, then return an error - } else if (!j && (!bgm->key_lens[i] || !bgm->keys[i]) - && (op == MDHIM_GET_NEXT || - op == MDHIM_GET_PREV)) { - error = MDHIM_ERROR; - goto respond; - } - - switch(op) { - //Get a record from the database - case MDHIM_GET_FIRST: - if (j == 0) { - keys[num_records] = NULL; - key_lens[num_records] = sizeof(int32_t); - } - case MDHIM_GET_NEXT: - - if (j && (ret = - index->mdhim_store->get_next(index->mdhim_store->db_handle, - get_key, get_key_len, - get_value, - get_value_len)) - != MDHIM_SUCCESS) { - mlog(MDHIM_SERVER_DBG, "Rank: %d - Couldn't get next record", - md->mdhim_rank); - error = ret; - - key_lens[num_records] = 0; - value_lens[num_records] = 0; - goto respond; - } else if (!j && (ret = - index->mdhim_store->get(index->mdhim_store->db_handle, - *get_key, *get_key_len, - get_value, - get_value_len)) - != MDHIM_SUCCESS) { - if ((ret = index->mdhim_store->get_next(index->mdhim_store->db_handle,\ - get_key, get_key_len, get_value, \ - get_value_len)) != MDHIM_SUCCESS) { - - key_lens[num_records] = 0; - value_lens[num_records] = 0; - goto respond; - } - } - - break; - case MDHIM_GET_LAST: - if (j == 0) { - keys[num_records] = NULL; - key_lens[num_records] = sizeof(int32_t); - } - case MDHIM_GET_PREV: - if (j && (ret = - index->mdhim_store->get_prev(index->mdhim_store->db_handle, - get_key, get_key_len, - get_value, - get_value_len)) - != MDHIM_SUCCESS) { - mlog(MDHIM_SERVER_DBG, "Rank: %d - Couldn't get prev record", - md->mdhim_rank); - error = ret; - key_lens[num_records] = 0; - value_lens[num_records] = 0; - goto respond; - } else if (!j && (ret = - index->mdhim_store->get(index->mdhim_store->db_handle, - *get_key, *get_key_len, - get_value, - get_value_len)) - != MDHIM_SUCCESS) { - error = ret; - key_lens[num_records] = 0; - value_lens[num_records] = 0; - goto respond; - } - break; - default: - mlog(MDHIM_SERVER_CRIT, "Rank: %d - Invalid operation for bulk get op", - md->mdhim_rank); - goto respond; - break; - } - - keys[num_records] = *get_key; - key_lens[num_records] = *get_key_len; - values[num_records] = *get_value; - value_lens[num_records] = *get_value_len; - num_records++; - } - } - } - else { - for (i = 0; i < bgm->num_keys; i++) { - for (j = 0; j < bgm->num_recs; j++) { - keys[i*bgm->num_recs+j] = NULL; - key_lens[i*bgm->num_recs+j] = 0; - } - } - - num_records = 0; - *get_key = malloc(bgm->key_lens[0]); - memcpy(*get_key, bgm->keys[0], bgm->key_lens[0]); - keys[0] = *get_key; - *get_key_len = bgm->key_lens[0]; - key_lens[0] = *get_key_len; - - error = mdhim_leveldb_batch_next(index->mdhim_store->db_handle, - (char **)keys, key_lens, - (char **)values, value_lens, - bgm->num_keys * bgm->num_recs, - &num_records); - - } - -respond: - - gettimeofday(&end, NULL); - add_timing(start, end, num_records, md, MDHIM_BULK_GET); - - //Create the response message - bgrm = malloc(sizeof(struct mdhim_bgetrm_t)); - //Set the type - bgrm->basem.mtype = MDHIM_RECV_BULK_GET; - //Set the operation return code as the error - bgrm->error = error; - //Set the server's rank - bgrm->basem.server_rank = md->mdhim_rank; - //Set the keys and values - bgrm->keys = keys; - bgrm->key_lens = key_lens; - bgrm->values = values; - bgrm->value_lens = value_lens; - bgrm->num_keys = num_records; - bgrm->basem.index = index->id; - bgrm->basem.index_type = index->type; - - //Send response - gettimeofday(&resp_get_comm_start, NULL); - ret = send_locally_or_remote(md, source, bgrm); - //Free stuff - if (source == md->mdhim_rank) { - /* If this message is not coming from myself, - free the keys and values from the get message */ - mdhim_partial_release_msg(bgm); - } - - free(get_key); - free(get_key_len); - free(get_value); - free(get_value_len); - - return MDHIM_SUCCESS; -} - -/* - * listener_thread - * Function for the thread that listens for new messages - */ -void *listener_thread(void *data) { - //Mlog statements could cause a deadlock on range_server_stop due to canceling of threads - - - struct mdhim_t *md = (struct mdhim_t *) data; - void *message; - int source; //The source of the message - int ret; - work_item *item; - - pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); - pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); - - while (1) { - if (md->shutdown) { - break; - } - - //Clean outstanding sends - range_server_clean_oreqs(md); - - //Receive messages sent to this server - ret = receive_rangesrv_work(md, &source, &message); - if (ret < MDHIM_SUCCESS) { - continue; - } - //printf("Rank: %d - Received message from rank: %d of type: %d", - // md->mdhim_rank, source, mtype); - recv_counter++; - //Create a new work item - item = malloc(sizeof(work_item)); - memset(item, 0, sizeof(work_item)); - - //Set the new buffer to the new item's message - item->message = message; - //Set the source in the work item - item->source = source; - //Add the new item to the work queue - range_server_add_work(md, item); - } - - return NULL; -} - -/* - * worker_thread - * Function for the thread that processes work in work queue - */ -void *worker_thread(void *data) { - //Mlog statements could cause a deadlock on range_server_stop due to canceling of threads - struct mdhim_t *md = (struct mdhim_t *) data; - work_item *item, *item_tmp; - int mtype; - int op, num_records, num_keys; - - while (1) { - if (md->shutdown) { - break; - } - //Lock the work queue mutex - pthread_mutex_lock(md->mdhim_rs->work_queue_mutex); - pthread_cleanup_push((void (*)(void *)) pthread_mutex_unlock, - (void *) md->mdhim_rs->work_queue_mutex); - - //Wait until there is work to be performed - if ((item = get_work(md)) == NULL) { - pthread_cond_wait(md->mdhim_rs->work_ready_cv, md->mdhim_rs->work_queue_mutex); - item = get_work(md); - } - - pthread_cleanup_pop(0); - if (!item) { - pthread_mutex_unlock(md->mdhim_rs->work_queue_mutex); - continue; - } - pthread_mutex_unlock(md->mdhim_rs->work_queue_mutex); - - //Clean outstanding sends - range_server_clean_oreqs(md); - - gettimeofday(&worker_start, NULL); - while (item) { - //Call the appropriate function depending on the message type - //Get the message type - mtype = ((struct mdhim_basem_t *) item->message)->mtype; - - switch(mtype) { - case MDHIM_PUT: - //Pack the put message and pass to range_server_put - range_server_put(md, - item->message, - item->source); - break; - case MDHIM_BULK_PUT: - //Pack the bulk put message and pass to range_server_put - gettimeofday(&worker_put_start, NULL); - range_server_bput(md, - item->message, - item->source); - gettimeofday(&worker_put_end, NULL); - worker_put_time += 1000000*(worker_put_end.tv_sec-worker_put_start.tv_sec)+worker_put_end.tv_usec-worker_put_start.tv_usec; - break; - case MDHIM_BULK_GET: - gettimeofday(&worker_get_start, NULL); - op = ((struct mdhim_bgetm_t *) item->message)->op; - num_records = ((struct mdhim_bgetm_t *) item->message)->num_recs; - num_keys = ((struct mdhim_bgetm_t *) item->message)->num_keys; - //The client is sending one key, but requesting the retrieval of more than one - if (num_records > 1 && num_keys == 1) { - range_server_bget_op(md, - item->message, - item->source, op); - } else { - range_server_bget(md, - item->message, - item->source); - } - - gettimeofday(&worker_get_end, NULL); - worker_get_time += 1000000*(worker_get_end.tv_sec-worker_get_start.tv_sec)+worker_get_end.tv_usec-worker_get_start.tv_usec; - break; - case MDHIM_DEL: - range_server_del(md, item->message, item->source); - break; - case MDHIM_BULK_DEL: - range_server_bdel(md, item->message, item->source); - break; - case MDHIM_COMMIT: - range_server_commit(md, item->message, item->source); - break; - default: - printf("Rank: %d - Got unknown work type: %d" - " from: %d\n", md->mdhim_rank, mtype, item->source); - break; - } - - item_tmp = item; - item = item->next; - free(item_tmp); - } - - //Clean outstanding sends - range_server_clean_oreqs(md); - if (putflag == 0) { - gettimeofday(&worker_end, NULL); - gettimeofday(&resp_get_comm_end, NULL); - resp_get_comm_time+=1000000*(resp_get_comm_end.tv_sec\ - -resp_get_comm_start.tv_sec)+resp_get_comm_end.tv_usec\ - -resp_get_comm_start.tv_usec; - } - else { - gettimeofday(&resp_put_comm_end, NULL); - resp_put_comm_time+=1000000*(resp_put_comm_end.tv_sec\ - -resp_put_comm_start.tv_sec)+resp_put_comm_end.tv_usec\ - -resp_put_comm_start.tv_usec; - } - worker_time += 1000000*(worker_end.tv_sec-worker_start.tv_sec)+worker_end.tv_usec-worker_start.tv_usec; - } - return NULL; -} - -int range_server_add_oreq(struct mdhim_t *md, MPI_Request *req, void *msg) { - out_req *oreq; - out_req *item; - - pthread_mutex_lock(md->mdhim_rs->out_req_mutex); - item = md->mdhim_rs->out_req_list; - oreq = malloc(sizeof(out_req)); - oreq->next = NULL; - oreq->prev = NULL; - oreq->message = msg; - oreq->req = req; - - if (!item) { - md->mdhim_rs->out_req_list = oreq; - pthread_mutex_unlock(md->mdhim_rs->out_req_mutex); - return MDHIM_SUCCESS; - } - - item->prev = oreq; - oreq->next = item; - md->mdhim_rs->out_req_list = oreq; - pthread_mutex_unlock(md->mdhim_rs->out_req_mutex); - - return MDHIM_SUCCESS; -} - -int range_server_clean_oreqs(struct mdhim_t *md) { - out_req *item; - out_req *t; - int ret = MDHIM_SUCCESS; - int flag = 0; - MPI_Status status; - - pthread_mutex_lock(md->mdhim_rs->out_req_mutex); - item = md->mdhim_rs->out_req_list; - while (item) { - if (!item->req) { - item = item->next; - continue; - } - - pthread_mutex_lock(md->mdhim_comm_lock); - ret = MPI_Test((MPI_Request *)item->req, &flag, &status); - pthread_mutex_unlock(md->mdhim_comm_lock); - - if (ret != MPI_SUCCESS) { - ret = MDHIM_ERROR; - break; - } - - if (!flag) { - item = item->next; - continue; - } - - if (item == md->mdhim_rs->out_req_list) { - md->mdhim_rs->out_req_list = item->next; - if (item->next) { - item->next->prev = NULL; - } - } else { - if (item->next) { - item->next->prev = item->prev; - } - if (item->prev) { - item->prev->next = item->next; - } - } - - t = item->next; - free(item->req); - if (item->message) { - free(item->message); - } - - free(item); - item = t; - } - - pthread_mutex_unlock(md->mdhim_rs->out_req_mutex); - - return ret; -} - -/** - * range_server_init - * Initializes the range server (i.e., starts the threads and populates the relevant data in md) - * - * @param md Pointer to the main MDHIM structure - * @return MDHIM_SUCCESS or MDHIM_ERROR on error - */ -int range_server_init(struct mdhim_t *md) { - int ret; - int i; - - //Allocate memory for the mdhim_rs_t struct - md->mdhim_rs = malloc(sizeof(struct mdhim_rs_t)); - if (!md->mdhim_rs) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - " - "Error while allocating memory for range server", - md->mdhim_rank); - return MDHIM_ERROR; - } - - //Initialize variables for printing out timings - md->mdhim_rs->put_time = 0; - md->mdhim_rs->get_time = 0; - md->mdhim_rs->num_put = 0; - md->mdhim_rs->num_get = 0; - //Initialize work queue - md->mdhim_rs->work_queue = malloc(sizeof(work_queue_t)); - md->mdhim_rs->work_queue->head = NULL; - md->mdhim_rs->work_queue->tail = NULL; - - //Initialize the outstanding request list - md->mdhim_rs->out_req_list = NULL; - - //Initialize work queue mutex - md->mdhim_rs->work_queue_mutex = malloc(sizeof(pthread_mutex_t)); - if (!md->mdhim_rs->work_queue_mutex) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - " - "Error while allocating memory for range server", - md->mdhim_rank); - return MDHIM_ERROR; - } - if ((ret = pthread_mutex_init(md->mdhim_rs->work_queue_mutex, NULL)) != 0) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - " - "Error while initializing work queue mutex", md->mdhim_rank); - return MDHIM_ERROR; - } - - //Initialize out req mutex - md->mdhim_rs->out_req_mutex = malloc(sizeof(pthread_mutex_t)); - if (!md->mdhim_rs->out_req_mutex) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - " - "Error while allocating memory for range server", - md->mdhim_rank); - return MDHIM_ERROR; - } - if ((ret = pthread_mutex_init(md->mdhim_rs->out_req_mutex, NULL)) != 0) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - " - "Error while initializing out req mutex", md->mdhim_rank); - return MDHIM_ERROR; - } - - //Initialize the condition variables - md->mdhim_rs->work_ready_cv = malloc(sizeof(pthread_cond_t)); - if (!md->mdhim_rs->work_ready_cv) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - " - "Error while allocating memory for range server", - md->mdhim_rank); - return MDHIM_ERROR; - } - if ((ret = pthread_cond_init(md->mdhim_rs->work_ready_cv, NULL)) != 0) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - " - "Error while initializing condition variable", - md->mdhim_rank); - return MDHIM_ERROR; - } - - //Initialize worker threads - md->mdhim_rs->workers = malloc(sizeof(pthread_t *) * md->db_opts->num_wthreads); - for (i = 0; i < md->db_opts->num_wthreads; i++) { - md->mdhim_rs->workers[i] = malloc(sizeof(pthread_t)); - if ((ret = pthread_create(md->mdhim_rs->workers[i], NULL, - worker_thread, (void *) md)) != 0) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - " - "Error while initializing worker thread", - md->mdhim_rank); - return MDHIM_ERROR; - } - } - - //Initialize listener threads - if ((ret = pthread_create(&md->mdhim_rs->listener, NULL, - listener_thread, (void *) md)) != 0) { - mlog(MDHIM_SERVER_CRIT, "MDHIM Rank: %d - " - "Error while initializing listener thread", - md->mdhim_rank); - return MDHIM_ERROR; - } - - return MDHIM_SUCCESS; -} diff --git a/meta/src/range_server.h b/meta/src/range_server.h deleted file mode 100644 index bfcbfeb8e..000000000 --- a/meta/src/range_server.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * - * Copyright 2017, UT-Battelle, LLC. - * - * LLNL-CODE-741539 - * All rights reserved. - * - * This is the license for UnifyFS. - * For details, see https://github.com/LLNL/UnifyFS. - * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. - */ - -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * Copyright (c) 2017, Florida State University. Contributions from - * the Computer Architecture and Systems Research Laboratory (CASTL) - * at the Department of Computer Science. - * - * Written by: Teng Wang, Adam Moody, Weikuan Yu, Kento Sato, Kathryn Mohror - * LLNL-CODE-728877. All rights reserved. - * - * This file is part of burstfs. - * For details, see https://github.com/llnl/burstfs - * Please read https://github.com/llnl/burstfs/LICENSE for full license text. - */ - -/* - * - * Copyright (c) 2014, Los Alamos National Laboratory - * All rights reserved. - * - */ - -#ifndef __RANGESRV_H -#define __RANGESRV_H - -#include -#include -#include "data_store.h" -#include "messages.h" -#include "indexes.h" - -struct mdhim_t; - -typedef struct work_item work_item; -struct work_item { - work_item *next; - work_item *prev; - void *message; - int source; -}; - -typedef struct work_queue_t { - work_item *head; - work_item *tail; -} work_queue_t; - -/* Outstanding requests (i.e., MPI_Req) that need to be freed later */ -typedef struct out_req out_req; -struct out_req { - out_req *next; - out_req *prev; - void *req; - MPI_Request *message; -}; - -/* Range server specific data */ -typedef struct mdhim_rs_t { - work_queue_t *work_queue; - pthread_mutex_t *work_queue_mutex; - pthread_cond_t *work_ready_cv; - pthread_t listener; - pthread_t **workers; - struct index *indexes; /* A linked list of remote indexes that is served - (partially for fully) by this range server */ - //Records seconds spent on putting records - long double put_time; - //Records seconds spend on getting records - long double get_time; - long num_put; - long num_get; - out_req *out_req_list; - pthread_mutex_t *out_req_mutex; -} mdhim_rs_t; - -int range_server_add_work(struct mdhim_t *md, work_item *item); -int range_server_init(struct mdhim_t *md); -int range_server_init_comm(struct mdhim_t *md); -int range_server_stop(struct mdhim_t *md); -int range_server_add_oreq(struct mdhim_t *md, MPI_Request *req, void *msg); //Add an outstanding request -int range_server_clean_oreqs(struct mdhim_t *md); //Clean outstanding reqs -int unifyfs_compare(const char *a, const char *b); - -#endif diff --git a/meta/src/uthash/LICENSE b/meta/src/uthash/LICENSE deleted file mode 100644 index ad8e16a09..000000000 --- a/meta/src/uthash/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -Copyright (c) 2005-2013, Troy D. Hanson http://troydhanson.github.com/uthash/ -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER -OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - diff --git a/meta/src/uthash/README.md b/meta/src/uthash/README.md deleted file mode 100644 index 3adebca4c..000000000 --- a/meta/src/uthash/README.md +++ /dev/null @@ -1,6 +0,0 @@ - -Documentation for uthash is available at: - -http://troydhanson.github.com/uthash/ - - diff --git a/meta/src/uthash/uthash.h b/meta/src/uthash/uthash.h deleted file mode 100644 index 0ae997ebf..000000000 --- a/meta/src/uthash/uthash.h +++ /dev/null @@ -1,947 +0,0 @@ -/* -Copyright (c) 2003-2013, Troy D. Hanson http://troydhanson.github.com/uthash/ -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER -OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef UTHASH_H -#define UTHASH_H - -#include /* memcmp,strlen */ -#include /* ptrdiff_t */ -#include /* exit() */ -#include /* int64_t */ - -/* These macros use decltype or the earlier __typeof GNU extension. - As decltype is only available in newer compilers (VS2010 or gcc 4.3+ - when compiling c++ source) this code uses whatever method is needed - or, for VS2008 where neither is available, uses casting workarounds. */ -#ifdef _MSC_VER /* MS compiler */ -#if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */ -#define DECLTYPE(x) (decltype(x)) -#else /* VS2008 or older (or VS2010 in C mode) */ -#define NO_DECLTYPE -#define DECLTYPE(x) -#endif -#else /* GNU, Sun and other compilers */ -#define DECLTYPE(x) (__typeof(x)) -#endif - -#ifdef NO_DECLTYPE -#define DECLTYPE_ASSIGN(dst,src) \ -do { \ - char **_da_dst = (char**)(&(dst)); \ - *_da_dst = (char*)(src); \ -} while(0) -#else -#define DECLTYPE_ASSIGN(dst,src) \ -do { \ - (dst) = DECLTYPE(dst)(src); \ -} while(0) -#endif - -/* a number of the hash function use uint32_t which isn't defined on win32 */ -#ifdef _MSC_VER -typedef unsigned int uint32_t; -typedef unsigned char uint8_t; -#else -#include /* uint32_t */ -#endif - -#define UTHASH_VERSION 1.9.8 - -#ifndef uthash_fatal -#define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */ -#endif -#ifndef uthash_malloc -#define uthash_malloc(sz) malloc(sz) /* malloc fcn */ -#endif -#ifndef uthash_free -#define uthash_free(ptr,sz) free(ptr) /* free fcn */ -#endif - -#ifndef uthash_noexpand_fyi -#define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */ -#endif -#ifndef uthash_expand_fyi -#define uthash_expand_fyi(tbl) /* can be defined to log expands */ -#endif - -/* initial number of buckets */ -#define HASH_INITIAL_NUM_BUCKETS 32 /* initial number of buckets */ -#define HASH_INITIAL_NUM_BUCKETS_LOG2 5 /* lg2 of initial number of buckets */ -#define HASH_BKT_CAPACITY_THRESH 10 /* expand when bucket count reaches */ - -/* calculate the element whose hash handle address is hhe */ -#define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho))) - -#define HASH_FIND(hh,head,keyptr,keylen,out) \ -do { \ - unsigned _hf_bkt,_hf_hashv; \ - out=NULL; \ - if (head) { \ - HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt); \ - if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv)) { \ - HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], \ - keyptr,keylen,out); \ - } \ - } \ -} while (0) - -#ifdef HASH_BLOOM -#define HASH_BLOOM_BITLEN (1ULL << HASH_BLOOM) -#define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8) + ((HASH_BLOOM_BITLEN%8) ? 1:0) -#define HASH_BLOOM_MAKE(tbl) \ -do { \ - (tbl)->bloom_nbits = HASH_BLOOM; \ - (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \ - if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \ - memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \ - (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ -} while (0) - -#define HASH_BLOOM_FREE(tbl) \ -do { \ - uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ -} while (0) - -#define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8] |= (1U << ((idx)%8))) -#define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8] & (1U << ((idx)%8))) - -#define HASH_BLOOM_ADD(tbl,hashv) \ - HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) - -#define HASH_BLOOM_TEST(tbl,hashv) \ - HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) - -#else -#define HASH_BLOOM_MAKE(tbl) -#define HASH_BLOOM_FREE(tbl) -#define HASH_BLOOM_ADD(tbl,hashv) -#define HASH_BLOOM_TEST(tbl,hashv) (1) -#define HASH_BLOOM_BYTELEN 0 -#endif - -#define HASH_MAKE_TABLE(hh,head) \ -do { \ - (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \ - sizeof(UT_hash_table)); \ - if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \ - memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \ - (head)->hh.tbl->tail = &((head)->hh); \ - (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ - (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ - (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \ - (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \ - HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ - if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \ - memset((head)->hh.tbl->buckets, 0, \ - HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ - HASH_BLOOM_MAKE((head)->hh.tbl); \ - (head)->hh.tbl->signature = HASH_SIGNATURE; \ -} while(0) - -#define HASH_ADD(hh,head,fieldname,keylen_in,add) \ - HASH_ADD_KEYPTR(hh,head,&((add)->fieldname),keylen_in,add) - -#define HASH_REPLACE(hh,head,fieldname,keylen_in,add,replaced) \ -do { \ - replaced=NULL; \ - HASH_FIND(hh,head,&((add)->fieldname),keylen_in,replaced); \ - if (replaced!=NULL) { \ - HASH_DELETE(hh,head,replaced); \ - }; \ - HASH_ADD(hh,head,fieldname,keylen_in,add); \ -} while(0) - -#define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \ -do { \ - unsigned _ha_bkt; \ - (add)->hh.next = NULL; \ - (add)->hh.key = (char*)keyptr; \ - (add)->hh.keylen = (unsigned)keylen_in; \ - if (!(head)) { \ - head = (add); \ - (head)->hh.prev = NULL; \ - HASH_MAKE_TABLE(hh,head); \ - } else { \ - (head)->hh.tbl->tail->next = (add); \ - (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \ - (head)->hh.tbl->tail = &((add)->hh); \ - } \ - (head)->hh.tbl->num_items++; \ - (add)->hh.tbl = (head)->hh.tbl; \ - HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets, \ - (add)->hh.hashv, _ha_bkt); \ - HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh); \ - HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv); \ - HASH_EMIT_KEY(hh,head,keyptr,keylen_in); \ - HASH_FSCK(hh,head); \ -} while(0) - -#define HASH_TO_BKT( hashv, num_bkts, bkt ) \ -do { \ - bkt = ((hashv) & ((num_bkts) - 1)); \ -} while(0) - -/* delete "delptr" from the hash table. - * "the usual" patch-up process for the app-order doubly-linked-list. - * The use of _hd_hh_del below deserves special explanation. - * These used to be expressed using (delptr) but that led to a bug - * if someone used the same symbol for the head and deletee, like - * HASH_DELETE(hh,users,users); - * We want that to work, but by changing the head (users) below - * we were forfeiting our ability to further refer to the deletee (users) - * in the patch-up process. Solution: use scratch space to - * copy the deletee pointer, then the latter references are via that - * scratch pointer rather than through the repointed (users) symbol. - */ -#define HASH_DELETE(hh,head,delptr) \ -do { \ - unsigned _hd_bkt; \ - struct UT_hash_handle *_hd_hh_del; \ - if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \ - uthash_free((head)->hh.tbl->buckets, \ - (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ - HASH_BLOOM_FREE((head)->hh.tbl); \ - uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ - head = NULL; \ - } else { \ - _hd_hh_del = &((delptr)->hh); \ - if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \ - (head)->hh.tbl->tail = \ - (UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \ - (head)->hh.tbl->hho); \ - } \ - if ((delptr)->hh.prev) { \ - ((UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \ - (head)->hh.tbl->hho))->next = (delptr)->hh.next; \ - } else { \ - DECLTYPE_ASSIGN(head,(delptr)->hh.next); \ - } \ - if (_hd_hh_del->next) { \ - ((UT_hash_handle*)((ptrdiff_t)_hd_hh_del->next + \ - (head)->hh.tbl->hho))->prev = \ - _hd_hh_del->prev; \ - } \ - HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ - HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ - (head)->hh.tbl->num_items--; \ - } \ - HASH_FSCK(hh,head); \ -} while (0) - - -/* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */ -#define HASH_FIND_STR(head,findstr,out) \ - HASH_FIND(hh,head,findstr,strlen(findstr),out) -#define HASH_ADD_STR(head,strfield,add) \ - HASH_ADD(hh,head,strfield,strlen(add->strfield),add) -#define HASH_REPLACE_STR(head,strfield,add,replaced) \ - HASH_REPLACE(hh,head,strfield,strlen(add->strfield),add,replaced) -#define HASH_FIND_INT(head,findint,out) \ - HASH_FIND(hh,head,findint,sizeof(int),out) -#define HASH_ADD_INT(head,intfield,add) \ - HASH_ADD(hh,head,intfield,sizeof(int),add) -#define HASH_REPLACE_INT(head,intfield,add,replaced) \ - HASH_REPLACE(hh,head,intfield,sizeof(int),add,replaced) -#define HASH_FIND_ULINT(head,findint,out) \ - HASH_FIND(hh,head,findint,sizeof(uint64_t),out) -#define HASH_ADD_ULINT(head,intfield,add) \ - HASH_ADD(hh,head,intfield,sizeof(uint64_t),add) -#define HASH_REPLACE_ULINT(head,intfield,add,replaced) \ - HASH_REPLACE(hh,head,intfield,sizeof(uint64_t),add,replaced) -#define HASH_FIND_PTR(head,findptr,out) \ - HASH_FIND(hh,head,findptr,sizeof(void *),out) -#define HASH_ADD_PTR(head,ptrfield,add) \ - HASH_ADD(hh,head,ptrfield,sizeof(void *),add) -#define HASH_REPLACE_PTR(head,ptrfield,add) \ - HASH_REPLACE(hh,head,ptrfield,sizeof(void *),add,replaced) -#define HASH_DEL(head,delptr) \ - HASH_DELETE(hh,head,delptr) - -/* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined. - * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined. - */ -#ifdef HASH_DEBUG -#define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0) -#define HASH_FSCK(hh,head) \ -do { \ - unsigned _bkt_i; \ - unsigned _count, _bkt_count; \ - char *_prev; \ - struct UT_hash_handle *_thh; \ - if (head) { \ - _count = 0; \ - for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \ - _bkt_count = 0; \ - _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ - _prev = NULL; \ - while (_thh) { \ - if (_prev != (char*)(_thh->hh_prev)) { \ - HASH_OOPS("invalid hh_prev %p, actual %p\n", \ - _thh->hh_prev, _prev ); \ - } \ - _bkt_count++; \ - _prev = (char*)(_thh); \ - _thh = _thh->hh_next; \ - } \ - _count += _bkt_count; \ - if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \ - HASH_OOPS("invalid bucket count %d, actual %d\n", \ - (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \ - } \ - } \ - if (_count != (head)->hh.tbl->num_items) { \ - HASH_OOPS("invalid hh item count %d, actual %d\n", \ - (head)->hh.tbl->num_items, _count ); \ - } \ - /* traverse hh in app order; check next/prev integrity, count */ \ - _count = 0; \ - _prev = NULL; \ - _thh = &(head)->hh; \ - while (_thh) { \ - _count++; \ - if (_prev !=(char*)(_thh->prev)) { \ - HASH_OOPS("invalid prev %p, actual %p\n", \ - _thh->prev, _prev ); \ - } \ - _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \ - _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \ - (head)->hh.tbl->hho) : NULL ); \ - } \ - if (_count != (head)->hh.tbl->num_items) { \ - HASH_OOPS("invalid app item count %d, actual %d\n", \ - (head)->hh.tbl->num_items, _count ); \ - } \ - } \ -} while (0) -#else -#define HASH_FSCK(hh,head) -#endif - -/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to - * the descriptor to which this macro is defined for tuning the hash function. - * The app can #include to get the prototype for write(2). */ -#ifdef HASH_EMIT_KEYS -#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \ -do { \ - unsigned _klen = fieldlen; \ - write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ - write(HASH_EMIT_KEYS, keyptr, fieldlen); \ -} while (0) -#else -#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) -#endif - -/* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */ -#ifdef HASH_FUNCTION -#define HASH_FCN HASH_FUNCTION -#else -#define HASH_FCN HASH_JEN -#endif - -/* The Bernstein hash function, used in Perl prior to v5.6 */ -#define HASH_BER(key,keylen,num_bkts,hashv,bkt) \ -do { \ - unsigned _hb_keylen=keylen; \ - char *_hb_key=(char*)(key); \ - (hashv) = 0; \ - while (_hb_keylen--) { (hashv) = ((hashv) * 33) + *_hb_key++; } \ - bkt = (hashv) & (num_bkts-1); \ -} while (0) - - -/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at - * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */ -#define HASH_SAX(key,keylen,num_bkts,hashv,bkt) \ -do { \ - unsigned _sx_i; \ - char *_hs_key=(char*)(key); \ - hashv = 0; \ - for(_sx_i=0; _sx_i < keylen; _sx_i++) \ - hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ - bkt = hashv & (num_bkts-1); \ -} while (0) - -#define HASH_FNV(key,keylen,num_bkts,hashv,bkt) \ -do { \ - unsigned _fn_i; \ - char *_hf_key=(char*)(key); \ - hashv = 2166136261UL; \ - for(_fn_i=0; _fn_i < keylen; _fn_i++) \ - hashv = (hashv * 16777619) ^ _hf_key[_fn_i]; \ - bkt = hashv & (num_bkts-1); \ -} while(0) - -#define HASH_OAT(key,keylen,num_bkts,hashv,bkt) \ -do { \ - unsigned _ho_i; \ - char *_ho_key=(char*)(key); \ - hashv = 0; \ - for(_ho_i=0; _ho_i < keylen; _ho_i++) { \ - hashv += _ho_key[_ho_i]; \ - hashv += (hashv << 10); \ - hashv ^= (hashv >> 6); \ - } \ - hashv += (hashv << 3); \ - hashv ^= (hashv >> 11); \ - hashv += (hashv << 15); \ - bkt = hashv & (num_bkts-1); \ -} while(0) - -#define HASH_JEN_MIX(a,b,c) \ -do { \ - a -= b; a -= c; a ^= ( c >> 13 ); \ - b -= c; b -= a; b ^= ( a << 8 ); \ - c -= a; c -= b; c ^= ( b >> 13 ); \ - a -= b; a -= c; a ^= ( c >> 12 ); \ - b -= c; b -= a; b ^= ( a << 16 ); \ - c -= a; c -= b; c ^= ( b >> 5 ); \ - a -= b; a -= c; a ^= ( c >> 3 ); \ - b -= c; b -= a; b ^= ( a << 10 ); \ - c -= a; c -= b; c ^= ( b >> 15 ); \ -} while (0) - -#define HASH_JEN(key,keylen,num_bkts,hashv,bkt) \ -do { \ - unsigned _hj_i,_hj_j,_hj_k; \ - unsigned char *_hj_key=(unsigned char*)(key); \ - hashv = 0xfeedbeef; \ - _hj_i = _hj_j = 0x9e3779b9; \ - _hj_k = (unsigned)keylen; \ - while (_hj_k >= 12) { \ - _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \ - + ( (unsigned)_hj_key[2] << 16 ) \ - + ( (unsigned)_hj_key[3] << 24 ) ); \ - _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \ - + ( (unsigned)_hj_key[6] << 16 ) \ - + ( (unsigned)_hj_key[7] << 24 ) ); \ - hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \ - + ( (unsigned)_hj_key[10] << 16 ) \ - + ( (unsigned)_hj_key[11] << 24 ) ); \ - \ - HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ - \ - _hj_key += 12; \ - _hj_k -= 12; \ - } \ - hashv += keylen; \ - switch ( _hj_k ) { \ - case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); \ - case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); \ - case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); \ - case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); \ - case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); \ - case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); \ - case 5: _hj_j += _hj_key[4]; \ - case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); \ - case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); \ - case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); \ - case 1: _hj_i += _hj_key[0]; \ - } \ - HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ - bkt = hashv & (num_bkts-1); \ -} while(0) - -/* The Paul Hsieh hash function */ -#undef get16bits -#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ - || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) -#define get16bits(d) (*((const uint16_t *) (d))) -#endif - -#if !defined (get16bits) -#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \ - +(uint32_t)(((const uint8_t *)(d))[0]) ) -#endif -#define HASH_SFH(key,keylen,num_bkts,hashv,bkt) \ -do { \ - unsigned char *_sfh_key=(unsigned char*)(key); \ - uint32_t _sfh_tmp, _sfh_len = keylen; \ - \ - int _sfh_rem = _sfh_len & 3; \ - _sfh_len >>= 2; \ - hashv = 0xcafebabe; \ - \ - /* Main loop */ \ - for (;_sfh_len > 0; _sfh_len--) { \ - hashv += get16bits (_sfh_key); \ - _sfh_tmp = (uint32_t)(get16bits (_sfh_key+2)) << 11 ^ hashv; \ - hashv = (hashv << 16) ^ _sfh_tmp; \ - _sfh_key += 2*sizeof (uint16_t); \ - hashv += hashv >> 11; \ - } \ - \ - /* Handle end cases */ \ - switch (_sfh_rem) { \ - case 3: hashv += get16bits (_sfh_key); \ - hashv ^= hashv << 16; \ - hashv ^= (uint32_t)(_sfh_key[sizeof (uint16_t)] << 18); \ - hashv += hashv >> 11; \ - break; \ - case 2: hashv += get16bits (_sfh_key); \ - hashv ^= hashv << 11; \ - hashv += hashv >> 17; \ - break; \ - case 1: hashv += *_sfh_key; \ - hashv ^= hashv << 10; \ - hashv += hashv >> 1; \ - } \ - \ - /* Force "avalanching" of final 127 bits */ \ - hashv ^= hashv << 3; \ - hashv += hashv >> 5; \ - hashv ^= hashv << 4; \ - hashv += hashv >> 17; \ - hashv ^= hashv << 25; \ - hashv += hashv >> 6; \ - bkt = hashv & (num_bkts-1); \ -} while(0) - -#ifdef HASH_USING_NO_STRICT_ALIASING -/* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads. - * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error. - * MurmurHash uses the faster approach only on CPU's where we know it's safe. - * - * Note the preprocessor built-in defines can be emitted using: - * - * gcc -m64 -dM -E - < /dev/null (on gcc) - * cc -## a.c (where a.c is a simple test file) (Sun Studio) - */ -#if (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86)) -#define MUR_GETBLOCK(p,i) p[i] -#else /* non intel */ -#define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 0x3) == 0) -#define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 0x3) == 1) -#define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 0x3) == 2) -#define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 0x3) == 3) -#define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL)) -#if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__)) -#define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24)) -#define MUR_TWO_TWO(p) ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16)) -#define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >> 8)) -#else /* assume little endian non-intel */ -#define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24)) -#define MUR_TWO_TWO(p) ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16)) -#define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) << 8)) -#endif -#define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) : \ - (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \ - (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : \ - MUR_ONE_THREE(p)))) -#endif -#define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) -#define MUR_FMIX(_h) \ -do { \ - _h ^= _h >> 16; \ - _h *= 0x85ebca6b; \ - _h ^= _h >> 13; \ - _h *= 0xc2b2ae35l; \ - _h ^= _h >> 16; \ -} while(0) - -#define HASH_MUR(key,keylen,num_bkts,hashv,bkt) \ -do { \ - const uint8_t *_mur_data = (const uint8_t*)(key); \ - const int _mur_nblocks = (keylen) / 4; \ - uint32_t _mur_h1 = 0xf88D5353; \ - uint32_t _mur_c1 = 0xcc9e2d51; \ - uint32_t _mur_c2 = 0x1b873593; \ - uint32_t _mur_k1 = 0; \ - const uint8_t *_mur_tail; \ - const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+_mur_nblocks*4); \ - int _mur_i; \ - for(_mur_i = -_mur_nblocks; _mur_i; _mur_i++) { \ - _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i); \ - _mur_k1 *= _mur_c1; \ - _mur_k1 = MUR_ROTL32(_mur_k1,15); \ - _mur_k1 *= _mur_c2; \ - \ - _mur_h1 ^= _mur_k1; \ - _mur_h1 = MUR_ROTL32(_mur_h1,13); \ - _mur_h1 = _mur_h1*5+0xe6546b64; \ - } \ - _mur_tail = (const uint8_t*)(_mur_data + _mur_nblocks*4); \ - _mur_k1=0; \ - switch((keylen) & 3) { \ - case 3: _mur_k1 ^= _mur_tail[2] << 16; \ - case 2: _mur_k1 ^= _mur_tail[1] << 8; \ - case 1: _mur_k1 ^= _mur_tail[0]; \ - _mur_k1 *= _mur_c1; \ - _mur_k1 = MUR_ROTL32(_mur_k1,15); \ - _mur_k1 *= _mur_c2; \ - _mur_h1 ^= _mur_k1; \ - } \ - _mur_h1 ^= (keylen); \ - MUR_FMIX(_mur_h1); \ - hashv = _mur_h1; \ - bkt = hashv & (num_bkts-1); \ -} while(0) -#endif /* HASH_USING_NO_STRICT_ALIASING */ - -/* key comparison function; return 0 if keys equal */ -#define HASH_KEYCMP(a,b,len) memcmp(a,b,len) - -/* iterate over items in a known bucket to find desired item */ -#define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \ -do { \ - if (head.hh_head) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head)); \ - else out=NULL; \ - while (out) { \ - if ((out)->hh.keylen == keylen_in) { \ - if ((HASH_KEYCMP((out)->hh.key,keyptr,keylen_in)) == 0) break; \ - } \ - if ((out)->hh.hh_next) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,(out)->hh.hh_next)); \ - else out = NULL; \ - } \ -} while(0) - -/* add an item to a bucket */ -#define HASH_ADD_TO_BKT(head,addhh) \ -do { \ - head.count++; \ - (addhh)->hh_next = head.hh_head; \ - (addhh)->hh_prev = NULL; \ - if (head.hh_head) { (head).hh_head->hh_prev = (addhh); } \ - (head).hh_head=addhh; \ - if (head.count >= ((head.expand_mult+1) * HASH_BKT_CAPACITY_THRESH) \ - && (addhh)->tbl->noexpand != 1) { \ - HASH_EXPAND_BUCKETS((addhh)->tbl); \ - } \ -} while(0) - -/* remove an item from a given bucket */ -#define HASH_DEL_IN_BKT(hh,head,hh_del) \ - (head).count--; \ - if ((head).hh_head == hh_del) { \ - (head).hh_head = hh_del->hh_next; \ - } \ - if (hh_del->hh_prev) { \ - hh_del->hh_prev->hh_next = hh_del->hh_next; \ - } \ - if (hh_del->hh_next) { \ - hh_del->hh_next->hh_prev = hh_del->hh_prev; \ - } - -/* Bucket expansion has the effect of doubling the number of buckets - * and redistributing the items into the new buckets. Ideally the - * items will distribute more or less evenly into the new buckets - * (the extent to which this is true is a measure of the quality of - * the hash function as it applies to the key domain). - * - * With the items distributed into more buckets, the chain length - * (item count) in each bucket is reduced. Thus by expanding buckets - * the hash keeps a bound on the chain length. This bounded chain - * length is the essence of how a hash provides constant time lookup. - * - * The calculation of tbl->ideal_chain_maxlen below deserves some - * explanation. First, keep in mind that we're calculating the ideal - * maximum chain length based on the *new* (doubled) bucket count. - * In fractions this is just n/b (n=number of items,b=new num buckets). - * Since the ideal chain length is an integer, we want to calculate - * ceil(n/b). We don't depend on floating point arithmetic in this - * hash, so to calculate ceil(n/b) with integers we could write - * - * ceil(n/b) = (n/b) + ((n%b)?1:0) - * - * and in fact a previous version of this hash did just that. - * But now we have improved things a bit by recognizing that b is - * always a power of two. We keep its base 2 log handy (call it lb), - * so now we can write this with a bit shift and logical AND: - * - * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) - * - */ -#define HASH_EXPAND_BUCKETS(tbl) \ -do { \ - unsigned _he_bkt; \ - unsigned _he_bkt_i; \ - struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ - UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ - _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \ - 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ - if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \ - memset(_he_new_buckets, 0, \ - 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ - tbl->ideal_chain_maxlen = \ - (tbl->num_items >> (tbl->log2_num_buckets+1)) + \ - ((tbl->num_items & ((tbl->num_buckets*2)-1)) ? 1 : 0); \ - tbl->nonideal_items = 0; \ - for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \ - { \ - _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \ - while (_he_thh) { \ - _he_hh_nxt = _he_thh->hh_next; \ - HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2, _he_bkt); \ - _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \ - if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \ - tbl->nonideal_items++; \ - _he_newbkt->expand_mult = _he_newbkt->count / \ - tbl->ideal_chain_maxlen; \ - } \ - _he_thh->hh_prev = NULL; \ - _he_thh->hh_next = _he_newbkt->hh_head; \ - if (_he_newbkt->hh_head) _he_newbkt->hh_head->hh_prev = \ - _he_thh; \ - _he_newbkt->hh_head = _he_thh; \ - _he_thh = _he_hh_nxt; \ - } \ - } \ - uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ - tbl->num_buckets *= 2; \ - tbl->log2_num_buckets++; \ - tbl->buckets = _he_new_buckets; \ - tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \ - (tbl->ineff_expands+1) : 0; \ - if (tbl->ineff_expands > 1) { \ - tbl->noexpand=1; \ - uthash_noexpand_fyi(tbl); \ - } \ - uthash_expand_fyi(tbl); \ -} while(0) - - -/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */ -/* Note that HASH_SORT assumes the hash handle name to be hh. - * HASH_SRT was added to allow the hash handle name to be passed in. */ -#define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn) -#define HASH_SRT(hh,head,cmpfcn) \ -do { \ - unsigned _hs_i; \ - unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \ - struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \ - if (head) { \ - _hs_insize = 1; \ - _hs_looping = 1; \ - _hs_list = &((head)->hh); \ - while (_hs_looping) { \ - _hs_p = _hs_list; \ - _hs_list = NULL; \ - _hs_tail = NULL; \ - _hs_nmerges = 0; \ - while (_hs_p) { \ - _hs_nmerges++; \ - _hs_q = _hs_p; \ - _hs_psize = 0; \ - for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \ - _hs_psize++; \ - _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ - ((void*)((char*)(_hs_q->next) + \ - (head)->hh.tbl->hho)) : NULL); \ - if (! (_hs_q) ) break; \ - } \ - _hs_qsize = _hs_insize; \ - while ((_hs_psize > 0) || ((_hs_qsize > 0) && _hs_q )) { \ - if (_hs_psize == 0) { \ - _hs_e = _hs_q; \ - _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ - ((void*)((char*)(_hs_q->next) + \ - (head)->hh.tbl->hho)) : NULL); \ - _hs_qsize--; \ - } else if ( (_hs_qsize == 0) || !(_hs_q) ) { \ - _hs_e = _hs_p; \ - _hs_p = (UT_hash_handle*)((_hs_p->next) ? \ - ((void*)((char*)(_hs_p->next) + \ - (head)->hh.tbl->hho)) : NULL); \ - _hs_psize--; \ - } else if (( \ - cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \ - DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \ - ) <= 0) { \ - _hs_e = _hs_p; \ - _hs_p = (UT_hash_handle*)((_hs_p->next) ? \ - ((void*)((char*)(_hs_p->next) + \ - (head)->hh.tbl->hho)) : NULL); \ - _hs_psize--; \ - } else { \ - _hs_e = _hs_q; \ - _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ - ((void*)((char*)(_hs_q->next) + \ - (head)->hh.tbl->hho)) : NULL); \ - _hs_qsize--; \ - } \ - if ( _hs_tail ) { \ - _hs_tail->next = ((_hs_e) ? \ - ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \ - } else { \ - _hs_list = _hs_e; \ - } \ - _hs_e->prev = ((_hs_tail) ? \ - ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \ - _hs_tail = _hs_e; \ - } \ - _hs_p = _hs_q; \ - } \ - _hs_tail->next = NULL; \ - if ( _hs_nmerges <= 1 ) { \ - _hs_looping=0; \ - (head)->hh.tbl->tail = _hs_tail; \ - DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \ - } \ - _hs_insize *= 2; \ - } \ - HASH_FSCK(hh,head); \ - } \ -} while (0) - -/* This function selects items from one hash into another hash. - * The end result is that the selected items have dual presence - * in both hashes. There is no copy of the items made; rather - * they are added into the new hash through a secondary hash - * hash handle that must be present in the structure. */ -#define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ -do { \ - unsigned _src_bkt, _dst_bkt; \ - void *_last_elt=NULL, *_elt; \ - UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \ - ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \ - if (src) { \ - for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \ - for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \ - _src_hh; \ - _src_hh = _src_hh->hh_next) { \ - _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ - if (cond(_elt)) { \ - _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \ - _dst_hh->key = _src_hh->key; \ - _dst_hh->keylen = _src_hh->keylen; \ - _dst_hh->hashv = _src_hh->hashv; \ - _dst_hh->prev = _last_elt; \ - _dst_hh->next = NULL; \ - if (_last_elt_hh) { _last_elt_hh->next = _elt; } \ - if (!dst) { \ - DECLTYPE_ASSIGN(dst,_elt); \ - HASH_MAKE_TABLE(hh_dst,dst); \ - } else { \ - _dst_hh->tbl = (dst)->hh_dst.tbl; \ - } \ - HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \ - HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \ - (dst)->hh_dst.tbl->num_items++; \ - _last_elt = _elt; \ - _last_elt_hh = _dst_hh; \ - } \ - } \ - } \ - } \ - HASH_FSCK(hh_dst,dst); \ -} while (0) - -#define HASH_CLEAR(hh,head) \ -do { \ - if (head) { \ - uthash_free((head)->hh.tbl->buckets, \ - (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \ - HASH_BLOOM_FREE((head)->hh.tbl); \ - uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ - (head)=NULL; \ - } \ -} while(0) - -#define HASH_OVERHEAD(hh,head) \ - (size_t)((((head)->hh.tbl->num_items * sizeof(UT_hash_handle)) + \ - ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket)) + \ - (sizeof(UT_hash_table)) + \ - (HASH_BLOOM_BYTELEN))) - -#ifdef NO_DECLTYPE -#define HASH_ITER(hh,head,el,tmp) \ -for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL); \ - el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL)) -#else -#define HASH_ITER(hh,head,el,tmp) \ -for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL); \ - el; (el)=(tmp),(tmp)=DECLTYPE(el)((tmp)?(tmp)->hh.next:NULL)) -#endif - -/* obtain a count of items in the hash */ -#define HASH_COUNT(head) HASH_CNT(hh,head) -#define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0) - -typedef struct UT_hash_bucket { - struct UT_hash_handle *hh_head; - unsigned count; - - /* expand_mult is normally set to 0. In this situation, the max chain length - * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If - * the bucket's chain exceeds this length, bucket expansion is triggered). - * However, setting expand_mult to a non-zero value delays bucket expansion - * (that would be triggered by additions to this particular bucket) - * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH. - * (The multiplier is simply expand_mult+1). The whole idea of this - * multiplier is to reduce bucket expansions, since they are expensive, in - * situations where we know that a particular bucket tends to be overused. - * It is better to let its chain length grow to a longer yet-still-bounded - * value, than to do an O(n) bucket expansion too often. - */ - unsigned expand_mult; - -} UT_hash_bucket; - -/* random signature used only to find hash tables in external analysis */ -#define HASH_SIGNATURE 0xa0111fe1 -#define HASH_BLOOM_SIGNATURE 0xb12220f2 - -typedef struct UT_hash_table { - UT_hash_bucket *buckets; - unsigned num_buckets, log2_num_buckets; - unsigned num_items; - struct UT_hash_handle *tail; /* tail hh in app order, for fast append */ - ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */ - - /* in an ideal situation (all buckets used equally), no bucket would have - * more than ceil(#items/#buckets) items. that's the ideal chain length. */ - unsigned ideal_chain_maxlen; - - /* nonideal_items is the number of items in the hash whose chain position - * exceeds the ideal chain maxlen. these items pay the penalty for an uneven - * hash distribution; reaching them in a chain traversal takes >ideal steps */ - unsigned nonideal_items; - - /* ineffective expands occur when a bucket doubling was performed, but - * afterward, more than half the items in the hash had nonideal chain - * positions. If this happens on two consecutive expansions we inhibit any - * further expansion, as it's not helping; this happens when the hash - * function isn't a good fit for the key domain. When expansion is inhibited - * the hash will still work, albeit no longer in constant time. */ - unsigned ineff_expands, noexpand; - - uint32_t signature; /* used only to find hash tables in external analysis */ -#ifdef HASH_BLOOM - uint32_t bloom_sig; /* used only to test bloom exists in external analysis */ - uint8_t *bloom_bv; - char bloom_nbits; -#endif - -} UT_hash_table; - -typedef struct UT_hash_handle { - struct UT_hash_table *tbl; - void *prev; /* prev element in app order */ - void *next; /* next element in app order */ - struct UT_hash_handle *hh_prev; /* previous hh in bucket order */ - struct UT_hash_handle *hh_next; /* next hh in bucket order */ - void *key; /* ptr to enclosing struct's key */ - unsigned keylen; /* enclosing struct's key len */ - unsigned hashv; /* result of hash-fcn(key) */ -} UT_hash_handle; - -#endif /* UTHASH_H */ diff --git a/meta/tests/single_tests/Makefile b/meta/tests/single_tests/Makefile deleted file mode 100644 index e9f1fb25c..000000000 --- a/meta/tests/single_tests/Makefile +++ /dev/null @@ -1,93 +0,0 @@ -include ../../Makefile.cfg -CLIBS += -L../../src -CINC += -I../../src -I../../src/uthash -all: put-get bput-bget put-del bput-bdel put-getn \ - put-getp puts-gets bput-bgetn bput-bgetp \ - bput-bget_secondary put-get_secondary \ - put-get_secondary_local bput-bget_secondary_local \ - put-getn_secondary put-getn_secondary_local \ - put-del_secondary put-getp_secondary put-get_2secondary_local \ - put-del_secondary_local plfs-put-get index_name range_test \ - range_bget - -put-get: put-get.c - $(CC) $< $(CINC) $(CLIBS) $(CFLAGS) -o $@ - -plfs-put-get: plfs-put-get.c - $(CC) $< $(CINC) $(CLIBS) $(CFLAGS) -o $@ - -put-get_secondary: put-get_secondary.c - $(CC) $< $(CINC) $(CLIBS) $(CFLAGS) -o $@ - -put-get_secondary_local: put-get_secondary_local.c - $(CC) $< $(CINC) $(CLIBS) $(CFLAGS) -o $@ - -put-get_2secondary_local: put-get_2secondary_local.c - $(CC) $< $(CINC) $(CLIBS) $(CFLAGS) -o $@ - -puts-gets: puts-gets.c - $(CC) $< $(CINC) $(CLIBS) $(CFLAGS) -o $@ - -put-getn: put-getn.c - $(CC) $< $(CINC) $(CLIBS) $(CFLAGS) -o $@ - -put-getn_secondary: put-getn_secondary.c - $(CC) $< $(CINC) $(CLIBS) $(CFLAGS) -o $@ - -put-getn_secondary_local: put-getn_secondary_local.c - $(CC) $< $(CINC) $(CLIBS) $(CFLAGS) -o $@ - -put-getp_secondary: put-getp_secondary.c - $(CC) $< $(CINC) $(CLIBS) $(CFLAGS) -o $@ - - -put-getp: put-getp.c - $(CC) $< $(CINC) $(CLIBS) $(CFLAGS) -o $@ - -put-del: put-del.c - $(CC) $< $(CINC) $(CLIBS) $(CFLAGS) -o $@ - -put-del_secondary: put-del_secondary.c - $(CC) $< $(CINC) $(CLIBS) $(CFLAGS) -o $@ - -put-del_secondary_local: put-del_secondary_local.c - $(CC) $< $(CINC) $(CLIBS) $(CFLAGS) -o $@ - -bput-bget: bput-bget.c - $(CC) $< $(CINC) $(CLIBS) $(CFLAGS) -o $@ - -bput-bget_secondary: bput-bget_secondary.c - $(CC) $< $(CINC) $(CLIBS) $(CFLAGS) -o $@ - -bput-bget_secondary_local: bput-bget_secondary_local.c - $(CC) $< $(CINC) $(CLIBS) $(CFLAGS) -o $@ - -bput-bgetn: bput-bgetn.c - $(CC) $< $(CINC) $(CLIBS) $(CFLAGS) -o $@ - -bput-bgetp: bput-bgetp.c - $(CC) $< $(CINC) $(CLIBS) $(CFLAGS) -o $@ - -range_test: range_test.c - $(CC) $< $(CINC) $(CLIBS) $(CFLAGS) -o $@ - -range_bget: range_bget.c - $(CC) $< $(CINC) $(CLIBS) $(CFLAGS) -o $@ - -bput-bdel: bput-bdel.c - $(CC) $< $(CINC) $(CLIBS) $(CFLAGS) -o $@ - -index_name: index_name.c - $(CC) $< $(CINC) $(CLIBS) $(CFLAGS) -o $@ - -clean: - rm -rf put-get bput-bget put-del bput-bdel\ - put-getn put-getp range_test\ - puts-gets bput-bgetn \ - bput-bgetp bput-bget_secondary \ - put-get_secondary put-get_secondary_local \ - bput-bget_secondary_local put-getn_secondary_local \ - put-getn_secondary put-del_secondary put-getp_secondary \ - put-get_2secondary_local put-del_secondary_local plfs-put-get index_name \ - range_test range_bget - diff --git a/meta/tests/single_tests/bput-bdel.c b/meta/tests/single_tests/bput-bdel.c deleted file mode 100644 index 056af2728..000000000 --- a/meta/tests/single_tests/bput-bdel.c +++ /dev/null @@ -1,144 +0,0 @@ -#include -#include -#include -#include "mpi.h" -#include "mdhim.h" - -#define KEYS 50 -int main(int argc, char **argv) { - int ret; - int provided; - int i; - struct mdhim_t *md; - int **keys; - int key_lens[KEYS]; - int **values; - int value_lens[KEYS]; - struct mdhim_brm_t *brm, *brmp; - struct mdhim_bgetrm_t *bgrm, *bgrmp; - char *db_path = "./"; - char *db_name = "mdhimTstDB-"; - int dbug = MLOG_CRIT; - mdhim_options_t *db_opts; // Local variable for db create options to be passed - int db_type = LEVELDB; //(data_store.h) - MPI_Comm comm; - - // Create options for DB initialization - db_opts = mdhim_options_init(); - mdhim_options_set_db_path(db_opts, db_path); - mdhim_options_set_db_name(db_opts, db_name); - mdhim_options_set_db_type(db_opts, db_type); - mdhim_options_set_key_type(db_opts, MDHIM_INT_KEY); - mdhim_options_set_debug_level(db_opts, dbug); - - //Initialize MPI - ret = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if (ret != MPI_SUCCESS) { - printf("Error initializing MPI with threads\n"); - exit(1); - } - - //Quit if the MPI doesn't support multiple thread mode - if (provided != MPI_THREAD_MULTIPLE) { - printf("Not able to enable MPI_THREAD_MULTIPLE mode\n"); - exit(1); - } - - //Initialize MDHIM - comm = MPI_COMM_WORLD; - md = mdhimInit(&comm, db_opts); - if (!md) { - printf("Error initializing MDHIM\n"); - MPI_Abort(MPI_COMM_WORLD, ret); - exit(1); - } - - keys = malloc(sizeof(int *) * KEYS); - values = malloc(sizeof(int *) * KEYS); - for (i = 0; i < KEYS; i++) { - keys[i] = malloc(sizeof(int)); - *keys[i] = (i + 1) * (md->mdhim_rank + 1); - printf("Rank: %d - Inserting key: %d\n", md->mdhim_rank, *keys[i]); - key_lens[i] = sizeof(int); - values[i] = malloc(sizeof(int)); - *values[i] = (i + 1) * (md->mdhim_rank + 1); - value_lens[i] = sizeof(int); - } - - //Insert the records - brm = mdhimBPut(md, (void **) keys, key_lens, - (void **) values, value_lens, KEYS, - NULL, NULL); - brmp = brm; - if (!brm || brm->error) { - printf("Rank - %d: Error inserting keys/values into MDHIM\n", md->mdhim_rank); - } - while (brmp) { - if (brmp->error < 0) { - printf("Rank: %d - Error inserting key/values info MDHIM\n", md->mdhim_rank); - } - - brmp = brmp->next; - //Free the message - mdhim_full_release_msg(brm); - brm = brmp; - } - - //Delete the records - brm = mdhimBDelete(md, md->primary_index, - (void **) keys, key_lens, - KEYS); - brmp = brm; - if (!brm || brm->error) { - printf("Rank - %d: Error deleting keys/values from MDHIM\n", md->mdhim_rank); - } - while (brmp) { - if (brmp->error < 0) { - printf("Rank: %d - Error deleting keys\n", md->mdhim_rank); - } - - brmp = brmp->next; - //Free the message - mdhim_full_release_msg(brm); - brm = brmp; - } - - //Try to get the records back - should fail - bgrm = mdhimBGet(md, md->primary_index, - (void **) keys, key_lens, - KEYS, MDHIM_GET_EQ); - bgrmp = bgrm; - while (bgrmp) { - if (bgrmp->error < 0) { - printf("Rank: %d - Error retrieving values\n", md->mdhim_rank); - } - - for (i = 0; i < bgrmp->num_keys && bgrmp->error >= 0; i++) { - - printf("Rank: %d - Got key: %d value: %d\n", md->mdhim_rank, - *(int *)bgrmp->keys[i], *(int *)bgrmp->values[i]); - } - - bgrmp = bgrmp->next; - //Free the message - mdhim_full_release_msg(bgrm); - bgrm = bgrmp; - } - - ret = mdhimClose(md); - if (ret != MDHIM_SUCCESS) { - printf("Error closing MDHIM\n"); - } - - MPI_Barrier(MPI_COMM_WORLD); - MPI_Finalize(); - for (i = 0; i < KEYS; i++) { - free(keys[i]); - free(values[i]); - } - - free(keys); - free(values); - - return 0; -} diff --git a/meta/tests/single_tests/bput-bget.c b/meta/tests/single_tests/bput-bget.c deleted file mode 100644 index 2348306cf..000000000 --- a/meta/tests/single_tests/bput-bget.c +++ /dev/null @@ -1,369 +0,0 @@ -#include -#include -#include -#include -#include -#include "mpi.h" -#include "mdhim.h" - -//#define KEYS 100 -//#define TOTAL 100000000 -#define GEN_STR_LEN 1024 - -extern double memgettime, ssdgettime, get_other_time, find_table_time; -extern double imemgettime, maytime, get_result_time; -extern double synctime, myseektime, roomtime, \ -batchtime, key_put_time, addtime, \ -bgtime, bartime, nbstoretime, newnodetime,\ - nbnexttime, findtime, judgetime, \ -heighttime, allfindtime, recordtime, \ -inserttime, waittime, block_reader_time, \ -seek_result_time, get_match_time, get_decode_time; -extern double valassigntime, keyptrtime, get_internal_time, find_tale_time, seek_iter_time; - -extern long nr_mem_get, nr_ssd_get, nr_hit, nr_miss; -extern double mem_get_time, ssd_get_time, cache_get_time, \ - read_compact_time, read_cache_time, read_block_time, \ - seek_block_time, mem_put_time, writeahead_time, \ - write_compact_time, sstable_time, tablecache_time; - - -typedef struct { - unsigned long fid; - unsigned long nodeid; - - unsigned long offset; - unsigned long addr; - unsigned long len; -}meta_t; - -typedef struct { - unsigned long fid; - unsigned long offset; -}ulfs_key_t; - -typedef struct { - unsigned long nodeid; - unsigned long len; - unsigned long addr; -}ulfs_val_t; - -extern double localgetcpytime; -extern double localrangetime; -extern double localbpmtime; -extern double localmalloctime; -extern double resp_get_comm_time; -extern double resp_put_comm_time; -extern double msgputtime; -extern double msggettime; -extern double dbgettime; -extern double dbbputtime; -extern double localcpytime; -extern double packputtime; -extern double packgettime; -extern double stat_time; -extern double packretgettime; -extern double packretputtime; -extern double packmpiputtime; -extern double localassigntime; - -int init_meta_lst(meta_t *meta_lst, ulfs_key_t **key_lst,\ - ulfs_val_t **value_lst, long segnum, long transz, int rank, \ - int size); - - -int main(int argc, char **argv) { - int ret; - int provided; - struct mdhim_t *md; - int **keys; - int **values; - int total = 0; - struct mdhim_brm_t *brm, *brmp; - struct mdhim_bgetrm_t *bgrm, *bgrmp; - struct timeval start_tv, end_tv, put_end; - - double get_time = 0, put_time = 0; - - int c, serratio, bulknum, size, path_len; - long transz, segnum, rangesz; - char db_path[GEN_STR_LEN] = {0}; - char db_name[GEN_STR_LEN] = {0}; - int dbug = MLOG_CRIT; //MLOG_CRIT=1, MLOG_DBG=2 - mdhim_options_t *db_opts; // Local variable for db create options to be passed - int db_type = LEVELDB; //(data_store.h) - MPI_Comm comm; - - static const char *opts = "c:t:s:r:n:p:d:"; - - while ((c = getopt(argc, argv, opts)) != -1) { - switch(c) { - case 'c': - bulknum = atoi(optarg); break; - case 's': - serratio = atoi(optarg); break; - case 't': - transz = atol(optarg); break; - case 'r': - rangesz = atol(optarg); break; - case 'n': - segnum = atol(optarg); break; - case 'p': - strcpy(db_path, optarg); break; - case 'd': - strcpy(db_name, optarg); break; - } - } - - int *key_lens; - int *val_lens; - // Create options for DB initialization - - db_opts = malloc(sizeof(struct mdhim_options_t)); - - db_opts->db_path = db_path; - db_opts->db_create_new = 1; - db_opts->db_value_append = MDHIM_DB_OVERWRITE; - - db_opts->rserver_factor = serratio; - db_opts->max_recs_per_slice = 1024 * 1024 *50; - db_opts->num_paths = 0; - db_opts->num_wthreads = 1; - - path_len = strlen(db_opts->db_path) \ - + strlen("manifest") + 1; - - char *manifest_path; - manifest_path = malloc(path_len); - sprintf(manifest_path, "%s/%s", db_opts->db_path, "manifest"); - db_opts->manifest_path = manifest_path; - db_opts->db_name = db_name; - db_opts->db_type = LEVELDB; - db_opts->db_key_type = MDHIM_UNIFYFS_KEY; - db_opts->debug_level = MLOG_CRIT; - db_opts->max_recs_per_slice = rangesz; - db_opts->rserver_factor = serratio; - - //Initialize MPI with multiple thread support - ret = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if (ret != MPI_SUCCESS) { - printf("Error initializing MPI with threads\n"); - exit(1); - } - - //Quit if MPI didn't initialize with multiple threads - if (provided != MPI_THREAD_MULTIPLE) { - printf("Not able to enable MPI_THREAD_MULTIPLE mode\n"); - exit(1); - } - - //Initialize MDHIM - comm = MPI_COMM_WORLD; - md = mdhimInit(&comm, db_opts); - if (!md) { - printf("Error initializing MDHIM\n"); - MPI_Abort(MPI_COMM_WORLD, ret); - exit(1); - } - - meta_t *meta_lst = (meta_t *)malloc(segnum*sizeof(meta_t)); - ulfs_key_t **key_lst = \ - (ulfs_key_t **)malloc(segnum*sizeof(ulfs_key_t *)); - ulfs_val_t **val_lst = \ - (ulfs_val_t **)malloc(segnum * sizeof(ulfs_val_t *)); - - key_lens = (int *)malloc(segnum * sizeof(int)); - val_lens = (int *)malloc(segnum * sizeof(int)); - - long i; - for (i = 0; i < segnum; i++) { - key_lens[i] = sizeof(ulfs_key_t); - val_lens[i] = sizeof(ulfs_val_t); - } - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(md->mdhim_comm, &size); - MPI_Barrier(MPI_COMM_WORLD); - - init_meta_lst(meta_lst, key_lst, val_lst,\ - segnum, transz, md->mdhim_rank, size); - - - gettimeofday(&start_tv, NULL); -// printf("rank:%d, mdihm_rank is %d\n", rank, md->mdhim_rank); -// fflush(stdout); - while (total != segnum) { - //Insert the keys into MDHIM - brm = mdhimBPut(md, (void **) (&key_lst[total]), key_lens, - (void **) (&val_lst[total]), val_lens, bulknum, - NULL, NULL); - brmp = brm; - if (!brmp || brmp->error) { - printf("Rank - %d: Error inserting keys/values into\ - MDHIM\n", md->mdhim_rank); - } - while (brmp) { - if (brmp->error < 0) { - printf("Rank: %d - Error inserting key/values info MDHIM\n", md->mdhim_rank); - } - - brmp = brmp->next; - //Free the message - mdhim_full_release_msg(brm); - brm = brmp; - } - - total += bulknum; - } - MPI_Barrier(MPI_COMM_WORLD); - //Commit the database - ret = mdhimCommit(md, md->primary_index); - if (ret != MDHIM_SUCCESS) { - printf("Error committing MDHIM database\n"); - } else { -// printf("Committed MDHIM database\n"); - } - - MPI_Barrier(MPI_COMM_WORLD); - total = 0; - gettimeofday(&put_end, NULL); - put_time = 1000000*(put_end.tv_sec-start_tv.tv_sec) + put_end.tv_usec-start_tv.tv_usec; - while (total != segnum) { - //Get the values back for each key inserted - bgrm = mdhimBGet(md, md->primary_index,\ - &key_lst[total], key_lens, - bulknum, MDHIM_GET_EQ); - bgrmp = bgrm; - while (bgrmp) { - if (bgrmp->error < 0) { - printf("Rank: %d - Error retrieving values", md->mdhim_rank); - } - - for (i = 0; i < bgrmp->num_keys && bgrmp->error >= 0; i++) { - -// printf("Rank: %d - Got key: %d value: %d\n", md->mdhim_rank, -// *(int *)bgrmp->keys[i], *(int *)bgrmp->values[i]); -/* printf("Rank: %d - num_keys is %ld, offset:%ld\n", md->mdhim_rank, - bgrmp->num_keys, ((ulfs_key_t *)(bgrmp->keys[i]))->offset); - fflush(stdout); -*/ - } - - bgrmp = bgrmp->next; - //Free the message received - mdhim_full_release_msg(bgrm); - bgrm = bgrmp; - } - - total += bulknum; - } - MPI_Barrier(MPI_COMM_WORLD); - gettimeofday(&end_tv, NULL); - get_time = 1000000*(end_tv.tv_sec - put_end.tv_sec) + end_tv.tv_usec - put_end.tv_usec; - //Quit MDHIM - ret = mdhimClose(md); - if (ret != MDHIM_SUCCESS) { - printf("Error closing MDHIM\n"); - } - - -if (md->mdhim_rank == 0) { -/* for (i = 0; i < KEYS; i++) { - free(keys[i]); - free(values[i]); - } - - free(keys); - free(values); -*/ -} - MPI_Barrier(MPI_COMM_WORLD); - if(md->mdhim_rank == 0) { - printf("put time is %lf, get time is %lf, \ - network_put is %lf, network_get is %lf, \ - ,dbputtime:%lf, dbgettime:%lf, packputtime:%lf, \ - packgettime:%lf, localcpytime:%lf, \ - resp_put_comm_time:%lf, \ - resp_get_comm_time:%lf, \ - stat_time:%lf, packretputtime:%lf, \ - packretgettime:%lf, \ - localgetcpytime:%lf, \ - packmpiputtime:%lf, \ - localassigntime:%lf, \ - localmalloctime:%lf, \ - localbpmtime:%lf, \ - localrangetime:%lf\n", \ - put_time/1000000,\ - get_time/1000000, \ - msgputtime/1000000, msggettime/1000000,\ - dbbputtime/1000000, \ - dbgettime/1000000, packputtime/1000000, \ - packgettime/1000000, localcpytime/1000000, \ - resp_put_comm_time/1000000, \ - resp_get_comm_time/1000000, \ - stat_time/1000000, \ - packretputtime/1000000, \ - packretgettime/1000000, \ - localgetcpytime/1000000, \ - packmpiputtime/1000000, \ - localassigntime/1000000, \ - localmalloctime/1000000, \ - localbpmtime/1000000, \ - localrangetime/1000000); - - printf("mem_get_time:%lf, ssd_get_time:%lf, \ - read_compact_time:%lf, read_cache_time:%lf, \ - read_block_time:%lf, seek_block_time:%lf, hit:%ld, miss:%ld, nr_mem_get:%ld, nr_ssd_get:%ld, sstabletime:%lf, tablecache_time:%lf, cache_get_time:%lf, get_other_time:%lf, getresulttime:%lf, findtime:%lf, internaltime:%lf, decodetime:%lf, seekresulttime:%lf, matchtime:%lf, blockreadtime:%lf, seekitertime:%lf\n\n", \ - mem_get_time/1000000, ssd_get_time/1000000, \ - read_compact_time/1000000, read_cache_time/1000000, \ - read_block_time/1000000, seek_block_time/1000000, nr_hit, \ - nr_miss, nr_mem_get, nr_ssd_get, sstable_time/1000000, \ - tablecache_time/1000000, cache_get_time/1000000, \ - get_other_time/1000000, \ - get_result_time/1000000, find_table_time/1000000,\ - get_internal_time/1000000,\ - get_decode_time/1000000, \ - seek_result_time/1000000, \ - get_match_time/1000000, \ - block_reader_time/1000000, \ - seek_iter_time/1000000); - - printf("mem_put_time:%lf, writeahead_time:%lf, \ - write_compact_time:%lf\n", mem_put_time/1000000, \ - writeahead_time/1000000, write_compact_time/1000000); - - - fflush(stdout); -// printf("Took: %u seconds to insert and get %u keys/values\n", -// (unsigned int) (end_tv.tv_sec - start_tv.tv_sec), TOTAL); -} - MPI_Finalize(); - return 0; -} - -int init_meta_lst(meta_t *meta_lst, ulfs_key_t **key_lst,\ - ulfs_val_t **value_lst, \ - long segnum, long transz, int rank, int size) { - - long i=0; - for (i = 0; i < segnum; i++) { - meta_lst[i].fid = 0; - -// meta_lst[i].offset = i * transz + rank * transz * segnum; - meta_lst[i].offset = i * transz * size + rank * transz; - meta_lst[i].nodeid = rank; - meta_lst[i].addr = i * transz; - meta_lst[i].len = transz; - fflush(stdout); - - key_lst[i] = (ulfs_key_t *)malloc(sizeof(ulfs_key_t)); - key_lst[i]->fid = meta_lst[i].fid; - key_lst[i]->offset = meta_lst[i].offset; - - value_lst[i] = (ulfs_val_t *)malloc(sizeof(ulfs_val_t)); - value_lst[i]->addr = meta_lst[i].addr; - value_lst[i]->len = meta_lst[i].len; - value_lst[i]->nodeid = meta_lst[i].nodeid; - } - return 0; -} diff --git a/meta/tests/single_tests/bput-bget_secondary.c b/meta/tests/single_tests/bput-bget_secondary.c deleted file mode 100644 index 4c20e28ba..000000000 --- a/meta/tests/single_tests/bput-bget_secondary.c +++ /dev/null @@ -1,255 +0,0 @@ -#include -#include -#include -#include -#include "mpi.h" -#include "mdhim.h" - -#define KEYS 10000 -#define TOTAL_KEYS 10000 -#define SLICE_SIZE 100000 -#define SECONDARY_SLICE_SIZE 100000 -#define PRIMARY 1 -#define SECONDARY 2 - -uint64_t **keys; -int *key_lens; -uint64_t **values; -int *value_lens; -uint64_t ***secondary_keys; -int **secondary_key_lens; - -void start_record(struct timeval *start) { - gettimeofday(start, NULL); -} - -void end_record(struct timeval *end) { - gettimeofday(end, NULL); -} - -void add_time(struct timeval *start, struct timeval *end, long double *time) { - long double elapsed = (long double) (end->tv_sec - start->tv_sec) + - ((long double) (end->tv_usec - start->tv_usec)/1000000.0); - *time += elapsed; -} - -void gen_keys_values(int rank, int total_keys) { - int i = 0; - for (i = 0; i < KEYS; i++) { - keys[i] = malloc(sizeof(uint64_t)); - *keys[i] = i + (uint64_t) ((uint64_t) rank * (uint64_t)TOTAL_KEYS) + total_keys; - /* If we are generating keys for the secondary index, then they should be distributed differently - across the range servers */ - secondary_keys[i] = malloc(sizeof(uint64_t *)); - *secondary_keys[i] = malloc(sizeof(uint64_t)); - **secondary_keys[i] = i + rank; - key_lens[i] = sizeof(uint64_t); - secondary_key_lens[i] = malloc(sizeof(uint64_t)); - *secondary_key_lens[i] = sizeof(uint64_t); - values[i] = malloc(sizeof(uint64_t)); - value_lens[i] = sizeof(uint64_t); - *values[i] = rank; - } -} - -void free_key_values() { - int i; - - for (i = 0; i < KEYS; i++) { - free(keys[i]); - free(values[i]); - free(*secondary_keys[i]); - free(secondary_keys[i]); - free(secondary_key_lens[i]); - } -} - -int main(int argc, char **argv) { - int ret; - int provided; - int i; - struct mdhim_t *md; - int total = 0; - struct mdhim_brm_t *brm, *brmp; - struct mdhim_bgetrm_t *bgrm, *bgrmp; - struct timeval start_tv, end_tv; - char *db_path = "./"; - char *db_name = "mdhimTstDB-"; - int dbug = MLOG_CRIT; //MLOG_CRIT=1, MLOG_DBG=2 - mdhim_options_t *db_opts; // Local variable for db create options to be passed - int db_type = LEVELDB; //(data_store.h) - long double put_time = 0; - long double get_time = 0; - struct index_t *secondary_index; - struct secondary_bulk_info *secondary_info; - int num_keys[KEYS]; - MPI_Comm comm; - - // Create options for DB initialization - db_opts = mdhim_options_init(); - mdhim_options_set_db_path(db_opts, db_path); - mdhim_options_set_db_name(db_opts, db_name); - mdhim_options_set_db_type(db_opts, db_type); - mdhim_options_set_key_type(db_opts, MDHIM_LONG_INT_KEY); - mdhim_options_set_max_recs_per_slice(db_opts, SLICE_SIZE); - mdhim_options_set_server_factor(db_opts, 4); - mdhim_options_set_debug_level(db_opts, dbug); - - //Initialize MPI with multiple thread support - ret = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if (ret != MPI_SUCCESS) { - printf("Error initializing MPI with threads\n"); - exit(1); - } - - //Quit if MPI didn't initialize with multiple threads - if (provided != MPI_THREAD_MULTIPLE) { - printf("Not able to enable MPI_THREAD_MULTIPLE mode\n"); - exit(1); - } - - gettimeofday(&start_tv, NULL); - - //Initialize MDHIM - comm = MPI_COMM_WORLD; - md = mdhimInit(&comm, db_opts); - if (!md) { - printf("Error initializing MDHIM\n"); - MPI_Abort(MPI_COMM_WORLD, ret); - exit(1); - } - - key_lens = malloc(sizeof(int) * KEYS); - value_lens = malloc(sizeof(int) * KEYS); - keys = malloc(sizeof(uint64_t *) * KEYS); - values = malloc(sizeof(uint64_t *) * KEYS); - secondary_key_lens = malloc(sizeof(int *) * KEYS); - secondary_keys = malloc(sizeof(uint64_t **) * KEYS); - memset(secondary_keys, 0, sizeof(uint64_t **) * KEYS); - - /* Secondary key entries */ - //Create the secondary global index - secondary_index = create_global_index(md, 2, SECONDARY_SLICE_SIZE, LEVELDB, - MDHIM_LONG_INT_KEY, NULL); - /* Primary key and secondary key entries */ - MPI_Barrier(MPI_COMM_WORLD); - total = 0; - - for (i = 0; i < KEYS; i++) { - num_keys[i] = 1; - } - while (total != TOTAL_KEYS) { - //Populate the primary keys and values to insert - gen_keys_values(md->mdhim_rank, total); - secondary_info = mdhimCreateSecondaryBulkInfo(secondary_index, - (void ***) secondary_keys, - secondary_key_lens, num_keys, - SECONDARY_GLOBAL_INFO); - //record the start time - start_record(&start_tv); - //Insert the primary keys into MDHIM - brm = mdhimBPut(md, (void **) keys, key_lens, - (void **) values, value_lens, KEYS, NULL, - secondary_info); - //Record the end time - end_record(&end_tv); - //Add the final time - add_time(&start_tv, &end_tv, &put_time); - brmp = brm; - if (!brmp || brmp->error) { - printf("Rank - %d: Error inserting keys/values into MDHIM\n", md->mdhim_rank); - } - while (brmp) { - if (brmp->error < 0) { - printf("Rank: %d - Error inserting key/values info MDHIM\n", md->mdhim_rank); - } - - brmp = brmp->next; - //Free the message - mdhim_full_release_msg(brm); - brm = brmp; - } - - free_key_values(); - total += KEYS; - mdhimReleaseSecondaryBulkInfo(secondary_info); - } - - /* End primary key and secondary key entries */ - - MPI_Barrier(MPI_COMM_WORLD); - /* End secondary key entries */ - - //Commit the database - ret = mdhimCommit(md, md->primary_index); - if (ret != MDHIM_SUCCESS) { - printf("Error committing MDHIM database\n"); - } else { - printf("Committed MDHIM database\n"); - } - - //Retrieve the primary key's values from the secondary key - total = 0; - - while (total != TOTAL_KEYS) { - //Populate the keys and values to retrieve - gen_keys_values(md->mdhim_rank, total); - start_record(&start_tv); - //Get the values back for each key inserted - for (i = 0; i < KEYS; i++) { - bgrm = mdhimBGet(md, secondary_index, (void **) secondary_keys[i], - secondary_key_lens[i], - 1, MDHIM_GET_PRIMARY_EQ); - } - end_record(&end_tv); - add_time(&start_tv, &end_tv, &get_time); - bgrmp = bgrm; - while (bgrmp) { - if (!bgrmp || bgrmp->error) { - printf("Rank: %d - Error retrieving values starting at: %llu", - md->mdhim_rank, (long long unsigned int) *keys[0]); - } - - //Validate that the data retrieved is the correct data - for (i = 0; i < bgrmp->num_keys && !bgrmp->error; i++) { - if (!bgrmp->value_lens[i]) { - printf("Rank: %d - Got an empty value for key: %llu", - md->mdhim_rank, *(long long unsigned int *)bgrmp->keys[i]); - continue; - } - } - - bgrm = bgrmp; - bgrmp = bgrmp->next; - //Free the message received - mdhim_full_release_msg(bgrm); - } - - free_key_values(); - total += KEYS; - } - - free(key_lens); - free(keys); - free(values); - free(value_lens); - free(secondary_key_lens); - free(secondary_keys); - MPI_Barrier(MPI_COMM_WORLD); - - //Quit MDHIM - ret = mdhimClose(md); - gettimeofday(&end_tv, NULL); - if (ret != MDHIM_SUCCESS) { - printf("Error closing MDHIM\n"); - } - - MPI_Barrier(MPI_COMM_WORLD); - MPI_Finalize(); - printf("Took: %Lf seconds to put %d keys\n", - put_time, TOTAL_KEYS * 2); - printf("Took: %Lf seconds to get %d keys/values\n", - get_time, TOTAL_KEYS * 2); - - return 0; -} diff --git a/meta/tests/single_tests/bput-bget_secondary_local.c b/meta/tests/single_tests/bput-bget_secondary_local.c deleted file mode 100644 index 50cd3b459..000000000 --- a/meta/tests/single_tests/bput-bget_secondary_local.c +++ /dev/null @@ -1,260 +0,0 @@ -#include -#include -#include -#include -#include "mpi.h" -#include "mdhim.h" - -#define KEYS 10000 -#define TOTAL_KEYS 10000 -#define SLICE_SIZE 100000 -#define SECONDARY_SLICE_SIZE 10000 -#define PRIMARY 1 -#define SECONDARY 2 - -uint64_t **keys; -int *key_lens; -uint64_t **values; -int *value_lens; -uint64_t ***secondary_keys; -int **secondary_key_lens; - -void start_record(struct timeval *start) { - gettimeofday(start, NULL); -} - -void end_record(struct timeval *end) { - gettimeofday(end, NULL); -} - -void add_time(struct timeval *start, struct timeval *end, long double *time) { - long double elapsed = (long double) (end->tv_sec - start->tv_sec) + - ((long double) (end->tv_usec - start->tv_usec)/1000000.0); - *time += elapsed; -} - -void gen_keys_values(int rank, int total_keys) { - int i = 0; - for (i = 0; i < KEYS; i++) { - keys[i] = malloc(sizeof(uint64_t)); - *keys[i] = i + (uint64_t) ((uint64_t) rank * (uint64_t)TOTAL_KEYS) + total_keys; - /* If we are generating keys for the secondary index, then they should be distributed differently - across the range servers */ - secondary_keys[i] = malloc(sizeof(uint64_t *)); - *secondary_keys[i] = malloc(sizeof(uint64_t)); - **secondary_keys[i] = i + rank; - key_lens[i] = sizeof(uint64_t); - secondary_key_lens[i] = malloc(sizeof(uint64_t)); - *secondary_key_lens[i] = sizeof(uint64_t); - values[i] = malloc(sizeof(uint64_t)); - value_lens[i] = sizeof(uint64_t); - *values[i] = rank; - //The secondary key's values should be the primary key they refer to - } -} - -void free_key_values() { - int i; - - for (i = 0; i < KEYS; i++) { - free(keys[i]); - free(values[i]); - free(*secondary_keys[i]); - free(secondary_keys[i]); - free(secondary_key_lens[i]); - } -} - -int main(int argc, char **argv) { - int ret; - int provided; - int i; - struct mdhim_t *md; - int total = 0; - struct mdhim_brm_t *brm, *brmp; - struct mdhim_bgetrm_t *bgrm, *bgrmp; - struct timeval start_tv, end_tv; - char *db_path = "./"; - char *db_name = "mdhimTstDB-"; - int dbug = MLOG_DBG; //MLOG_CRIT=1, MLOG_DBG=2 - mdhim_options_t *db_opts; // Local variable for db create options to be passed - int db_type = LEVELDB; //(data_store.h) - long double put_time = 0; - long double get_time = 0; - struct index_t *secondary_local_index; - struct secondary_bulk_info *secondary_info; - int num_keys[KEYS]; - MPI_Comm comm; - - // Create options for DB initialization - db_opts = mdhim_options_init(); - mdhim_options_set_db_path(db_opts, db_path); - mdhim_options_set_db_name(db_opts, db_name); - mdhim_options_set_db_type(db_opts, db_type); - mdhim_options_set_key_type(db_opts, MDHIM_LONG_INT_KEY); - mdhim_options_set_max_recs_per_slice(db_opts, SLICE_SIZE); - mdhim_options_set_server_factor(db_opts, 4); - mdhim_options_set_debug_level(db_opts, dbug); - - //Initialize MPI with multiple thread support - ret = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if (ret != MPI_SUCCESS) { - printf("Error initializing MPI with threads\n"); - exit(1); - } - - //Quit if MPI didn't initialize with multiple threads - if (provided != MPI_THREAD_MULTIPLE) { - printf("Not able to enable MPI_THREAD_MULTIPLE mode\n"); - exit(1); - } - - gettimeofday(&start_tv, NULL); - - //Initialize MDHIM - comm = MPI_COMM_WORLD; - md = mdhimInit(&comm, db_opts); - if (!md) { - printf("Error initializing MDHIM\n"); - MPI_Abort(MPI_COMM_WORLD, ret); - exit(1); - } - - key_lens = malloc(sizeof(int) * KEYS); - value_lens = malloc(sizeof(int) * KEYS); - keys = malloc(sizeof(uint64_t *) * KEYS); - values = malloc(sizeof(uint64_t *) * KEYS); - secondary_key_lens = malloc(sizeof(int *) * KEYS); - secondary_keys = malloc(sizeof(uint64_t **) * KEYS); - memset(secondary_keys, 0, sizeof(uint64_t **) * KEYS); - - /* Primary and secondary key entries */ - MPI_Barrier(MPI_COMM_WORLD); - total = 0; - secondary_local_index = create_local_index(md, LEVELDB, - MDHIM_LONG_INT_KEY, NULL); - for (i = 0; i < KEYS; i++) { - num_keys[i] = 1; - } - while (total != TOTAL_KEYS) { - //Populate the primary keys and values to insert - gen_keys_values(md->mdhim_rank, total); - secondary_info = mdhimCreateSecondaryBulkInfo(secondary_local_index, - (void ***) secondary_keys, - secondary_key_lens, num_keys, - SECONDARY_LOCAL_INFO); - //record the start time - start_record(&start_tv); - //Insert the primary keys into MDHIM - brm = mdhimBPut(md, (void **) keys, key_lens, - (void **) values, value_lens, KEYS, - NULL, secondary_info); - //Record the end time - end_record(&end_tv); - //Add the final time - add_time(&start_tv, &end_tv, &put_time); - if (!brm || brm->error) { - printf("Rank - %d: Error inserting keys/values into MDHIM\n", md->mdhim_rank); - } - while (brm) { - if (brm->error < 0) { - printf("Rank: %d - Error inserting key/values info MDHIM\n", md->mdhim_rank); - } - - brmp = brm->next; - //Free the message - mdhim_full_release_msg(brm); - brm = brmp; - } - - free_key_values(); - mdhimReleaseSecondaryBulkInfo(secondary_info); - total += KEYS; - } - - /* End primary and secondary entries */ - - - MPI_Barrier(MPI_COMM_WORLD); - /* End secondary key entries */ - - //Commit the database - ret = mdhimCommit(md, md->primary_index); - if (ret != MDHIM_SUCCESS) { - printf("Error committing MDHIM database\n"); - } else { - printf("Committed MDHIM database\n"); - } - - //Get the stats for the secondary index so the client figures out who to query - ret = mdhimStatFlush(md, secondary_local_index); - if (ret != MDHIM_SUCCESS) { - printf("Error getting stats\n"); - } else { - printf("Got stats\n"); - } - - MPI_Barrier(MPI_COMM_WORLD); - //Retrieve the primary key's values from the secondary key - total = 0; - while (total != TOTAL_KEYS) { - //Populate the keys and values to retrieve - gen_keys_values(md->mdhim_rank, total); - start_record(&start_tv); - //Get the values back for each key inserted - for (i = 0; i < KEYS; i++) { - bgrm = mdhimBGet(md, secondary_local_index, - (void **) secondary_keys[i], secondary_key_lens[i], - 1, MDHIM_GET_PRIMARY_EQ); - } - - end_record(&end_tv); - add_time(&start_tv, &end_tv, &get_time); - while (bgrm) { - /* if (!bgrm || bgrm->error) { - printf("Rank: %d - Error retrieving values starting at: %llu", - md->mdhim_rank, (long long unsigned int) *keys[0]); - } */ - - //Validate that the data retrieved is the correct data - for (i = 0; i < bgrm->num_keys && !bgrm->error; i++) { - if (!bgrm->value_lens[i]) { - printf("Rank: %d - Got an empty value for key: %llu", - md->mdhim_rank, *(long long unsigned int *)bgrm->keys[i]); - continue; - } - } - - bgrmp = bgrm; - bgrm = bgrm->next; - mdhim_full_release_msg(bgrmp); - } - - free_key_values(); - total += KEYS; - } - - free(key_lens); - free(keys); - free(values); - free(value_lens); - free(secondary_key_lens); - free(secondary_keys); - MPI_Barrier(MPI_COMM_WORLD); - - //Quit MDHIM - ret = mdhimClose(md); - gettimeofday(&end_tv, NULL); - if (ret != MDHIM_SUCCESS) { - printf("Error closing MDHIM\n"); - } - - MPI_Barrier(MPI_COMM_WORLD); - MPI_Finalize(); - printf("Took: %Lf seconds to put %d keys\n", - put_time, TOTAL_KEYS * 2); - printf("Took: %Lf seconds to get %d keys/values\n", - get_time, TOTAL_KEYS * 2); - - return 0; -} diff --git a/meta/tests/single_tests/bput-bgetn.c b/meta/tests/single_tests/bput-bgetn.c deleted file mode 100644 index 63976672e..000000000 --- a/meta/tests/single_tests/bput-bgetn.c +++ /dev/null @@ -1,220 +0,0 @@ -#include -#include -#include -#include -#include "mpi.h" -#include "mdhim.h" - -#define KEYS 1000000 -#define TOTAL_KEYS 1000000 -#define SLICE_SIZE 1000000 - -uint64_t **keys; -int *key_lens; -uint64_t **values; -int *value_lens; - -void start_record(struct timeval *start) { - gettimeofday(start, NULL); -} - -void end_record(struct timeval *end) { - gettimeofday(end, NULL); -} - -void add_time(struct timeval *start, struct timeval *end, long double *time) { - long double elapsed = (long double) (end->tv_sec - start->tv_sec) + - ((long double) (end->tv_usec - start->tv_usec)/1000000.0); - *time += elapsed; -} - -void gen_keys_values(int rank, int total_keys) { - int i = 0; - for (i = 0; i < KEYS; i++) { - keys[i] = malloc(sizeof(uint64_t)); - *keys[i] = i + (uint64_t) ((uint64_t) rank * (uint64_t)TOTAL_KEYS) + total_keys; - key_lens[i] = sizeof(uint64_t); - values[i] = malloc(sizeof(int)); - *values[i] = 1; - value_lens[i] = sizeof(int); - } -} - -void free_key_values() { - int i; - - for (i = 0; i < KEYS; i++) { - free(keys[i]); - free(values[i]); - } -} - -int main(int argc, char **argv) { - int ret; - int provided; - int i; - struct mdhim_t *md; - struct mdhim_brm_t *brm, *brmp; - struct mdhim_bgetrm_t *bgrm; - struct timeval start_tv, end_tv; - char *db_name = "mdhimTstDB-"; - int dbug = MLOG_CRIT; //MLOG_CRIT=1, MLOG_DBG=2 - mdhim_options_t *db_opts; // Local variable for db create options to be passed - int db_type = LEVELDB; // (data_store.h) - int size; - long double flush_time = 0; - long double put_time = 0; - long double get_time = 0; - int total_keys = 0; - int round = 0; - char *paths[] = {"./"}; - MPI_Comm comm; - - // Create options for DB initialization - db_opts = mdhim_options_init(); - mdhim_options_set_db_paths(db_opts, paths, 1); - mdhim_options_set_db_name(db_opts, db_name); - mdhim_options_set_db_type(db_opts, db_type); - mdhim_options_set_key_type(db_opts, MDHIM_LONG_INT_KEY); - mdhim_options_set_max_recs_per_slice(db_opts, SLICE_SIZE); - mdhim_options_set_server_factor(db_opts, 1); - mdhim_options_set_debug_level(db_opts, dbug); - - //Initialize MPI with multiple thread support - ret = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if (ret != MPI_SUCCESS) { - printf("Error initializing MPI with threads\n"); - exit(1); - } - - //Quit if MPI didn't initialize with multiple threads - if (provided != MPI_THREAD_MULTIPLE) { - printf("Not able to enable MPI_THREAD_MULTIPLE mode\n"); - exit(1); - } - - //Initialize MDHIM - comm = MPI_COMM_WORLD; - md = mdhimInit(&comm, db_opts); - if (!md) { - printf("Error initializing MDHIM\n"); - MPI_Abort(MPI_COMM_WORLD, ret); - exit(1); - } - - key_lens = malloc(sizeof(uint64_t) * KEYS); - value_lens = malloc(sizeof(int) * KEYS); - keys = malloc(sizeof(uint64_t *) * KEYS); - values = malloc(sizeof(int *) * KEYS); - MPI_Comm_size(md->mdhim_comm, &size); - MPI_Barrier(MPI_COMM_WORLD); - //record the start time - start_record(&start_tv); - while (total_keys != TOTAL_KEYS) { - //Populate the keys and values to insert - gen_keys_values(md->mdhim_rank, total_keys); - //Insert the keys into MDHIM - brm = mdhimBPut(md, (void **) keys, key_lens, - (void **) values, value_lens, KEYS, - NULL, NULL); - // MPI_Barrier(MPI_COMM_WORLD); - - //Iterate through the return messages to see if there is an error and to free it - brmp = brm; - if (!brmp || brmp->error) { - printf("Rank - %d: Error inserting keys/values into MDHIM\n", md->mdhim_rank); - } - while (brmp) { - if (brmp->error < 0) { - printf("Rank: %d - Error inserting key/values info MDHIM\n", md->mdhim_rank); - } - - brm = brmp; - brmp = brmp->next; - //Free the message - mdhim_full_release_msg(brm); - - } - - free_key_values(); - total_keys += KEYS; - round++; - } - - //Record the end time - end_record(&end_tv); - //Add the final time - add_time(&start_tv, &end_tv, &put_time); - MPI_Barrier(MPI_COMM_WORLD); - //Get the stats - start_record(&start_tv); - ret = mdhimStatFlush(md, md->primary_index); - // MPI_Barrier(MPI_COMM_WORLD); - end_record(&end_tv); - add_time(&start_tv, &end_tv, &flush_time); - - if (ret != MDHIM_SUCCESS) { - printf("Error getting stats from MDHIM database\n"); - } else { -// printf("Got stats\n"); - } - - total_keys = 0; - while (total_keys != TOTAL_KEYS) { - //Populate the keys and values we expect to retrieve - gen_keys_values(md->mdhim_rank, total_keys); - start_record(&start_tv); - //Get the keys and values back starting from and including key[0] - bgrm = mdhimBGetOp(md, md->primary_index, - keys[0], sizeof(uint64_t), - KEYS, MDHIM_GET_NEXT); - // MPI_Barrier(MPI_COMM_WORLD); - end_record(&end_tv); - add_time(&start_tv, &end_tv, &get_time); - //Check if there is an error - if (!bgrm || bgrm->error) { - printf("Rank: %d - Error retrieving values starting at: %llu", - md->mdhim_rank, (long long unsigned int) *keys[0]); - goto done; - } - - //Validate that the data retrieved is the correct data - for (i = 0; i < bgrm->num_keys && !bgrm->error; i++) { - assert(*(uint64_t *)bgrm->keys[i] == *keys[i]); - assert(*(int *)bgrm->values[i] == *values[i]); - } - - //Free the message received - mdhim_full_release_msg(bgrm); - free_key_values(); - total_keys += KEYS; - round++; - } - - free(key_lens); - free(keys); - free(values); - free(value_lens); -done: - MPI_Barrier(MPI_COMM_WORLD); - //Quit MDHIM - ret = mdhimClose(md); - mdhim_options_destroy(db_opts); - gettimeofday(&end_tv, NULL); - - if (ret != MDHIM_SUCCESS) { - printf("Error closing MDHIM\n"); - } - - MPI_Barrier(MPI_COMM_WORLD); - MPI_Finalize(); - - printf("Took: %Lf seconds to put %d keys\n", - put_time, TOTAL_KEYS); - printf("Took: %Lf seconds to get %d keys/values\n", - get_time, TOTAL_KEYS); - printf("Took: %Lf seconds to stat flush\n", - flush_time); - - return 0; -} diff --git a/meta/tests/single_tests/bput-bgetp.c b/meta/tests/single_tests/bput-bgetp.c deleted file mode 100644 index 5750d4426..000000000 --- a/meta/tests/single_tests/bput-bgetp.c +++ /dev/null @@ -1,217 +0,0 @@ -#include -#include -#include -#include -#include "mpi.h" -#include "mdhim.h" - -#define KEYS 100000 -#define TOTAL_KEYS 100000 - -int **keys; -int *key_lens; -int **values; -int *value_lens; - -void start_record(struct timeval *start) { - gettimeofday(start, NULL); -} - -void end_record(struct timeval *end) { - gettimeofday(end, NULL); -} - -void add_time(struct timeval *start, struct timeval *end, long *time) { - *time += end->tv_sec - start->tv_sec; -} - -void gen_keys_values(int rank, int total_keys) { - int i = 0; - for (i = 0; i < KEYS; i++) { - keys[i] = malloc(sizeof(int)); - //Keys are chosen to fit in one slice - *keys[i] = i + (rank * TOTAL_KEYS) + total_keys; - key_lens[i] = sizeof(int); - values[i] = malloc(sizeof(int)); - *values[i] = *keys[i]; - value_lens[i] = sizeof(int); - } -} - -void free_key_values() { - int i; - - for (i = 0; i < KEYS; i++) { - free(keys[i]); - free(values[i]); - } -} - -int main(int argc, char **argv) { - int ret; - int provided; - int i; - struct mdhim_t *md; - struct mdhim_brm_t *brm, *brmp; - struct mdhim_bgetrm_t *bgrm; - struct timeval start_tv, end_tv; - char *db_path = "./"; - char *db_name = "mdhimTstDB-"; - int dbug = MLOG_CRIT; //MLOG_CRIT=1, MLOG_DBG=2 - mdhim_options_t *db_opts; // Local variable for db create options to be passed - int db_type = LEVELDB; // (data_store.h) - int size; - long flush_time = 0; - long put_time = 0; - long get_time = 0; - int total_keys = 0; - int round = 0; - MPI_Comm comm; - - // Create options for DB initialization - db_opts = mdhim_options_init(); - mdhim_options_set_db_path(db_opts, db_path); - mdhim_options_set_db_name(db_opts, db_name); - mdhim_options_set_db_type(db_opts, db_type); - mdhim_options_set_key_type(db_opts, MDHIM_INT_KEY); - mdhim_options_set_debug_level(db_opts, dbug); - - //Initialize MPI with multiple thread support - ret = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if (ret != MPI_SUCCESS) { - printf("Error initializing MPI with threads\n"); - exit(1); - } - - //Quit if MPI didn't initialize with multiple threads - if (provided != MPI_THREAD_MULTIPLE) { - printf("Not able to enable MPI_THREAD_MULTIPLE mode\n"); - exit(1); - } - - //Initialize MDHIM - comm = MPI_COMM_WORLD; - md = mdhimInit(&comm, db_opts); - if (!md) { - printf("Error initializing MDHIM\n"); - MPI_Abort(MPI_COMM_WORLD, ret); - exit(1); - } - - key_lens = malloc(sizeof(int) * KEYS); - value_lens = malloc(sizeof(int) * KEYS); - keys = malloc(sizeof(int *) * KEYS); - values = malloc(sizeof(int *) * KEYS); - MPI_Comm_size(md->mdhim_comm, &size); - while (total_keys != TOTAL_KEYS) { - //Populate the keys and values to insert - gen_keys_values(md->mdhim_rank, total_keys); - //record the start time - start_record(&start_tv); - //Insert the keys into MDHIM - brm = mdhimBPut(md, - (void **) keys, key_lens, - (void **) values, value_lens, - KEYS, NULL, NULL); - // MPI_Barrier(MPI_COMM_WORLD); - //record the end time - end_record(&end_tv); - //add the time - add_time(&start_tv, &end_tv, &put_time); - - //Iterate through the return messages to see if there is an error and to free it - brmp = brm; - if (!brmp || brmp->error) { - printf("Rank - %d: Error inserting keys/values into MDHIM\n", md->mdhim_rank); - } - while (brmp) { - if (brmp->error < 0) { - printf("Rank: %d - Error inserting key/values info MDHIM\n", md->mdhim_rank); - } - - brm = brmp; - brmp = brmp->next; - //Free the message - mdhim_full_release_msg(brm); - - } - - free_key_values(); - total_keys += KEYS; - round++; - } - - //Get the stats - start_record(&start_tv); - ret = mdhimStatFlush(md, md->primary_index); - // MPI_Barrier(MPI_COMM_WORLD); - end_record(&end_tv); - add_time(&start_tv, &end_tv, &flush_time); - - if (ret != MDHIM_SUCCESS) { - printf("Error getting stats from MDHIM database\n"); - } else { -// printf("Got stats\n"); - } - - total_keys = 0; - while (total_keys != TOTAL_KEYS) { - //Populate the keys and values we expect to retrieve - gen_keys_values(md->mdhim_rank, total_keys); - start_record(&start_tv); - //Get the keys and values back starting from and including key[0] - bgrm = mdhimBGetOp(md, md->primary_index, - keys[KEYS - 1], sizeof(int), - KEYS, MDHIM_GET_PREV); - // MPI_Barrier(MPI_COMM_WORLD); - end_record(&end_tv); - add_time(&start_tv, &end_tv, &get_time); - //Check if there is an error - if (!bgrm) { - printf("Rank: %d - Empty bgrm: %d\n", md->mdhim_rank, *keys[KEYS-1]); - } - if (!bgrm || bgrm->error) { - printf("Rank: %d - Error retrieving values starting at: %d\n", md->mdhim_rank, *keys[KEYS-1]); - goto done; - } - - //Validate that the data retrieved is the correct data - for (i = 0; i < bgrm->num_keys && !bgrm->error; i++) { - assert(*(int *)bgrm->keys[i] == *keys[KEYS - 1 - i]); - assert(*(int *)bgrm->values[i] == *values[KEYS - 1 - i]); - } - - //Free the message received - mdhim_full_release_msg(bgrm); - free_key_values(); - total_keys += KEYS; - round++; - } - - free(key_lens); - free(keys); - free(values); - free(value_lens); -done: - //Quit MDHIM - ret = mdhimClose(md); - mdhim_options_destroy(db_opts); - gettimeofday(&end_tv, NULL); - - if (ret != MDHIM_SUCCESS) { - printf("Error closing MDHIM\n"); - } - - MPI_Barrier(MPI_COMM_WORLD); - MPI_Finalize(); - - printf("Took: %ld seconds to put %d keys\n", - put_time, TOTAL_KEYS); - printf("Took: %ld seconds to get %d keys/values\n", - get_time, TOTAL_KEYS); - printf("Took: %ld seconds to stat flush\n", - flush_time); - - - return 0; -} diff --git a/meta/tests/single_tests/grey_sort.c b/meta/tests/single_tests/grey_sort.c deleted file mode 100644 index 6bebe772f..000000000 --- a/meta/tests/single_tests/grey_sort.c +++ /dev/null @@ -1,169 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include "mpi.h" -#include "mdhim.h" -#include "db_options.h" - -#define KEYS 100000 -#define KEY_SIZE 100 -#define NUM_KEYS 10000000 -int main(int argc, char **argv) { - int ret; - int provided; - int i, j, fd; - struct mdhim_t *md; - void **keys; - int key_lens[KEYS]; - char **values; - int value_lens[KEYS]; - struct mdhim_brm_t *brm, *brmp; - struct mdhim_bgetrm_t *bgrm; - struct timeval start_tv, end_tv; - char filename[255]; - mdhim_options_t *mdhim_opts; - struct mdhim_stat *stat, *tmp; - - //Initialize MPI with multiple thread support - ret = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if (ret != MPI_SUCCESS) { - printf("Error initializing MPI with threads\n"); - exit(1); - } - - //Quit if MPI didn't initialize with multiple threads - if (provided != MPI_THREAD_MULTIPLE) { - printf("Not able to enable MPI_THREAD_MULTIPLE mode\n"); - exit(1); - } - - mdhim_opts = mdhim_options_init(); - mdhim_options_set_db_path(mdhim_opts, "./"); - mdhim_options_set_db_name(mdhim_opts, "mdhimTstDB"); - mdhim_options_set_db_type(mdhim_opts, LEVELDB); - mdhim_options_set_key_type(mdhim_opts, MDHIM_BYTE_KEY); - - //Initialize MDHIM - md = mdhimInit(MPI_COMM_WORLD, mdhim_opts); - sprintf(filename, "%s%d", "/scratch/hng/input/input", md->mdhim_rank); - if ((fd = open(filename, O_RDONLY)) < 0) { - printf("Error opening input file"); - exit(1); - } - if (!md) { - printf("Error initializing MDHIM\n"); - MPI_Abort(MPI_COMM_WORLD, ret); - exit(1); - } - - //Start the timing - gettimeofday(&start_tv, NULL); - - //Read in the keys from the files and insert them - for (j = 0; j < NUM_KEYS; j+=KEYS) { - //Populate the keys and values to insert - keys = malloc(sizeof(void *) * KEYS); - values = malloc(sizeof(char *) * KEYS); - for (i = 0; i < KEYS; i++) { - keys[i] = malloc(KEY_SIZE); - ret = read(fd, keys[i], KEY_SIZE); - if (ret != KEY_SIZE) { - printf("Error reading in key\n"); - } - - key_lens[i] = KEY_SIZE; - values[i] = malloc(1); - *values[i] = 'a'; - value_lens[i] = 1; - } - - //Insert the keys into MDHIM - brm = mdhimBPut(md, (void **) keys, key_lens, - (void **) values, value_lens, KEYS); - brmp = brm; - if (!brm || brm->error) { - printf("Rank - %d: Error inserting keys/values into MDHIM\n", md->mdhim_rank); - } - while (brmp) { - if (brmp->error < 0) { - printf("Rank: %d - Error inserting key/values info MDHIM\n", md->mdhim_rank); - } - - brmp = brmp->next; - //Free the message - mdhim_full_release_msg(brm); - brm = brmp; - } - - for (i = 0; i < KEYS; i++) { - free(keys[i]); - - } - free(keys); - for (i = 0; i < KEYS; i++) { - free(values[i]); - - } - free(values); - } - - close(fd); - ret = mdhimStatFlush(md); - if ((ret = im_range_server(md)) != 1) { - goto done; - } - - //Iterate through my range server stat hash entries to retrieve all the slices - HASH_ITER(hh, md->mdhim_rs->mdhim_store->mdhim_store_stats, stat, tmp) { - if (!stat) { - continue; - } - - bgrm = mdhimBGetOp(md, stat->min, KEY_SIZE, - KEYS, MDHIM_GET_NEXT); - //Check if there is an error - if (!bgrm || bgrm->error) { - printf("Rank: %d - Error retrieving values", md->mdhim_rank); - goto done; - } - - sprintf(filename, "%s%d", "/scratch/hng/output/output_slice_", stat->key); - - if ((fd = open(filename, O_WRONLY | O_CREAT)) < 0) { - printf("Error opening output file"); - exit(1); - } - - for (i = 0; i < bgrm->num_records && !bgrm->error; i++) { - ret = write(fd, bgrm->keys[i], KEY_SIZE); - if (ret != KEY_SIZE) { - printf("Error writing key\n"); - } - } - - close(fd); - //Free the message received - mdhim_full_release_msg(bgrm); - } - - gettimeofday(&end_tv, NULL); - -done: - MPI_Barrier(MPI_COMM_WORLD); - //Quit MDHIM - ret = mdhimClose(md); - if (ret != MDHIM_SUCCESS) { - printf("Error closing MDHIM\n"); - } - - MPI_Barrier(MPI_COMM_WORLD); - MPI_Finalize(); - - printf("Took: %u seconds to sort and output %u keys/values\n", - (uint32_t) (end_tv.tv_sec - start_tv.tv_sec), NUM_KEYS); - return 0; -} diff --git a/meta/tests/single_tests/index_name.c b/meta/tests/single_tests/index_name.c deleted file mode 100644 index 17d20985a..000000000 --- a/meta/tests/single_tests/index_name.c +++ /dev/null @@ -1,279 +0,0 @@ -/* - * ===================================================================================== - * - * Filename: index_name.c - * - * Description: Testing Index Naming - * - * Version: 1.0 - * Created: 11/26/2014 12:35:26 PM - * Revision: none - * Compiler: gcc - * - * Author: Aaron Caldwell (), caldwellfans@gmail.com - * Organization: - * - * ===================================================================================== - */ - -#include -#include -#include -#include "mpi.h" -#include "mdhim.h" -#include "mdhim_options.h" - -#define SLICE_SIZE 100 -#define SECONDAERY_SLICE_SIZE 5 - -int main(int argc, char **argv) { - int ret; - int provided=0; - struct mdhim_t *md; - uint32_t key, key2, key3, **secondary_keys, **secondary_keys2, **secondary_keys3; - int value, *secondary_key_lens, *secondary_key_lens2, *secondary_key_lens3; - struct mdhim_brm_t *brm; - struct mdhim_bgetrm_t *bgrm; - mdhim_options_t *db_opts; - struct index_t *secondary_local_index, *secondary_local_index2, *secondary_local_index3, *secondary_local_index_by_name; - struct secondary_info *secondary_info, *secondary_info2, *secondary_info3; - MPI_Comm comm; - - ret = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if (ret != MPI_SUCCESS) { - printf("Error initializing MPI with threads\n"); - exit(1); - } - - if ( provided != MPI_THREAD_MULTIPLE) { - printf("Not able to enable MPI_THREAD_MULTIPLE mode.\n"); - exit(1); - } - - db_opts = mdhim_options_init(); - mdhim_options_set_db_path(db_opts, "./"); - mdhim_options_set_db_name(db_opts, "mdhimInxName"); - mdhim_options_set_db_type(db_opts, LEVELDB); - mdhim_options_set_key_type(db_opts, MDHIM_INT_KEY); //Key_type = 1 (int) - mdhim_options_set_debug_level(db_opts, MLOG_CRIT); - - comm = MPI_COMM_WORLD; - md = mdhimInit(&comm, db_opts); - if(!md) { - printf("ERROR initializing MDHIM\n"); - exit(1); - } - - key = 100 * (md->mdhim_rank + 1); - value = 500 * (md->mdhim_rank + 1); - - secondary_keys = malloc(sizeof(uint32_t *)); - secondary_keys[0] = malloc(sizeof(uint32_t)); - *secondary_keys[0] = md->mdhim_rank + 1; - secondary_key_lens = malloc(sizeof(int)); - secondary_key_lens[0] = sizeof(uint32_t); - - secondary_keys2 = malloc(sizeof(uint32_t *)); - secondary_keys2[0] = malloc(sizeof(uint32_t)); - *secondary_keys2[0] = md->mdhim_rank + 1; - secondary_key_lens2 = malloc(sizeof(int)); - secondary_key_lens2[0] = sizeof(uint32_t); - - secondary_keys3 = malloc(sizeof(uint32_t *)); - secondary_keys3[0] = malloc(sizeof(uint32_t)); - *secondary_keys3[0] = md->mdhim_rank + 1; - secondary_key_lens3 = malloc(sizeof(int)); - secondary_key_lens3[0] = sizeof(uint32_t); - - //Create a secondary index on only one range server - secondary_local_index = create_local_index(md, LEVELDB, MDHIM_INT_KEY, - "Test1"); - secondary_local_index2 = create_local_index(md, LEVELDB, MDHIM_INT_KEY, - "Test2"); - secondary_local_index3 = create_local_index(md, LEVELDB, MDHIM_INT_KEY, - "Test3"); - - secondary_info = mdhimCreateSecondaryInfo(secondary_local_index, - (void **) secondary_keys, - secondary_key_lens, 1, - SECONDARY_LOCAL_INFO); - - secondary_info2 = mdhimCreateSecondaryInfo(secondary_local_index2, - (void **) secondary_keys2, - secondary_key_lens2, 1, - SECONDARY_LOCAL_INFO); - - secondary_info3 = mdhimCreateSecondaryInfo(secondary_local_index3, - (void **) secondary_keys3, - secondary_key_lens3, 1, - SECONDARY_LOCAL_INFO); - - - brm = mdhimPut(md, &key, sizeof(key), &value, sizeof(value), NULL, secondary_info); - - if (!brm || brm->error) { - printf("Error inserting key/value into MDHIM\n"); - } else { - printf("Successfully inserted key/value into MDHIM\n"); - } - - // Release the received message - mdhim_full_release_msg(brm); - - // Insert a new key with the second secondary key - key2 = 200 * (md->mdhim_rank + 1); - brm = mdhimPut(md, &key2, sizeof(key2), &value, sizeof(value), NULL, secondary_info2); - - if (!brm || brm->error) { - printf("Secondary Error inserting key/value into MDHIM\n"); - } else { - printf("Successfully inserted key/value into MDHIM\n"); - } - - // Release the received message - mdhim_full_release_msg(brm); - - // Insert a new key with the third secondary key - key3 = 300 * (md->mdhim_rank + 1); - brm = mdhimPut(md, &key3, sizeof(key3), &value, sizeof(value), NULL, secondary_info3); - - if (!brm || brm->error) { - printf("Secondary Error inserting key/value into MDHIM\n"); - } else { - printf("Successfully inserted key/value into MDHIM\n"); - } - - // Release the received message - mdhim_full_release_msg(brm); - - // Commit the database - ret = mdhimCommit(md, md->primary_index); - if(ret != MDHIM_SUCCESS) { - printf("Error commiting MDHIM database\n"); - } else { - printf("Committed MDHIM database\n"); - } - - // Get the stats for the secondary index so the client figures out who to query - ret = mdhimStatFlush(md, secondary_local_index); - if(ret != MDHIM_SUCCESS) { - printf("Error getting stats\n"); - } else { - printf("Got stats\n"); - } - - // Get the stats for the secondary index so the client figures out who to query - ret = mdhimStatFlush(md, secondary_local_index2); - if(ret != MDHIM_SUCCESS) { - printf("Error getting stats\n"); - } else { - printf("Got stats\n"); - } - - // Get the stats for the secondary index so the client figures out who to query - ret = mdhimStatFlush(md, secondary_local_index3); - if(ret != MDHIM_SUCCESS) { - printf("Error getting stats\n"); - } else { - printf("Got stats\n"); - } - - secondary_local_index_by_name = NULL; - - MPI_Barrier(MPI_COMM_WORLD); - - printf("[RANK %d] - Attempting to get secondary_local_index '%s'\n", md->mdhim_rank, secondary_local_index->name); - secondary_local_index_by_name = get_index_by_name(md,secondary_local_index->name); - if(secondary_local_index_by_name && strcmp(secondary_local_index_by_name->name,secondary_local_index->name)==0) { - printf("[RANK %d] - Found secondary_local_index %s\n",md->mdhim_rank, secondary_local_index_by_name->name); - } else { - printf("[RANK %d] - Error finding secondary_local_index by name '%s'\n", md->mdhim_rank, secondary_local_index->name); - } - - // Get the primary key values from the secondary local key - value = 0; - bgrm = mdhimGet(md, secondary_local_index, - &secondary_keys[0][0], - secondary_key_lens[0], - MDHIM_GET_PRIMARY_EQ); - - if(!bgrm || bgrm->error) { - printf("Error getting value for key: %d from MDHIM\n", key); - }else if (bgrm->value_lens[0]) { - printf("Successfully got value: %d from MDHIM\n", *((int *) bgrm->values[0])); - } - - mdhim_full_release_msg(bgrm); - - printf("[RANK %d] - Attempting to get secondary_local_index '%s'\n", md->mdhim_rank, secondary_local_index2->name); - secondary_local_index_by_name = get_index_by_name(md,secondary_local_index2->name); - if(secondary_local_index_by_name && strcmp(secondary_local_index_by_name->name,secondary_local_index2->name)==0) { - printf("[RANK %d] - Found secondary_local_index %s\n",md->mdhim_rank, secondary_local_index_by_name->name); - } else { - printf("[RANK %d] - Error finding secondary_local_index by name '%s'\n", md->mdhim_rank, secondary_local_index2->name); - } - - // Get the primary key values from the secondary local key - value = 0; - bgrm = mdhimGet(md, secondary_local_index, - &secondary_keys[0][0], - secondary_key_lens[0], - MDHIM_GET_PRIMARY_EQ); - - if(!bgrm || bgrm->error) { - printf("Error getting value for key: %d from MDHIM\n", key); - }else if (bgrm->value_lens[0]) { - printf("Successfully got value: %d from MDHIM\n", *((int *) bgrm->values[0])); - } - - mdhim_full_release_msg(bgrm); - - printf("[RANK %d] - Attempting to get secondary_local_index '%s'\n", md->mdhim_rank, secondary_local_index3->name); - secondary_local_index_by_name = get_index_by_name(md,secondary_local_index3->name); - - if(secondary_local_index_by_name && strcmp(secondary_local_index_by_name->name,secondary_local_index3->name)==0) { - printf("[RANK %d] - Found secondary_local_index %s\n",md->mdhim_rank, secondary_local_index_by_name->name); - } else { - printf("[RANK %d] - Error finding secondary_local_index by name '%s'\n", md->mdhim_rank, secondary_local_index3->name); - } - - // Get the primary key values from the secondary local key - value = 0; - bgrm = mdhimGet(md, secondary_local_index, - &secondary_keys[0][0], - secondary_key_lens[0], - MDHIM_GET_PRIMARY_EQ); - - if(!bgrm || bgrm->error) { - printf("Error getting value for key: %d from MDHIM\n", key); - }else if (bgrm->value_lens[0]) { - printf("Successfully got value: %d from MDHIM\n", *((int *) bgrm->values[0])); - } - - mdhim_full_release_msg(bgrm); - - - ret = mdhimClose(md); - free(secondary_keys[0]); - free(secondary_keys); - free(secondary_key_lens); - free(secondary_keys2[0]); - free(secondary_keys2); - free(secondary_key_lens2); - free(secondary_keys3[0]); - free(secondary_keys3); - free(secondary_key_lens3); - mdhim_options_destroy(db_opts); - mdhimReleaseSecondaryInfo(secondary_info); - mdhimReleaseSecondaryInfo(secondary_info2); - mdhimReleaseSecondaryInfo(secondary_info3); - - if(ret != MDHIM_SUCCESS) { - printf("Error closing MDHIM\n"); - } - - MPI_Barrier(MPI_COMM_WORLD); - MPI_Finalize(); - - return 0; -} diff --git a/meta/tests/single_tests/mdhim_manifest_1_0_1 b/meta/tests/single_tests/mdhim_manifest_1_0_1 deleted file mode 100644 index 6d83f083f..000000000 Binary files a/meta/tests/single_tests/mdhim_manifest_1_0_1 and /dev/null differ diff --git a/meta/tests/single_tests/plfs-bput-bgetn.c b/meta/tests/single_tests/plfs-bput-bgetn.c deleted file mode 100644 index 79bacb5a7..000000000 --- a/meta/tests/single_tests/plfs-bput-bgetn.c +++ /dev/null @@ -1,219 +0,0 @@ -#include -#include -#include -#include -#include "mpi.h" -#include "mdhim.h" - -#define KEYS 1000000 -//#define TOTAL_KEYS 2083334 -#define TOTAL_KEYS 1000000 -#define SLICE_SIZE 1000000 - -uint64_t **keys; -int *key_lens; -uint64_t **values; -int *value_lens; - -void start_record(struct timeval *start) { - gettimeofday(start, NULL); -} - -void end_record(struct timeval *end) { - gettimeofday(end, NULL); -} - -void add_time(struct timeval *start, struct timeval *end, long double *time) { - long double elapsed = (long double) (end->tv_sec - start->tv_sec) + - ((long double) (end->tv_usec - start->tv_usec)/1000000.0); - *time += elapsed; -} - -void gen_keys_values(int rank, int total_keys) { - int i = 0; - for (i = 0; i < KEYS; i++) { - keys[i] = malloc(sizeof(uint64_t)); - *keys[i] = i + (uint64_t) ((uint64_t) rank * (uint64_t)TOTAL_KEYS) + total_keys; - key_lens[i] = sizeof(uint64_t); - values[i] = malloc(sizeof(int)); - *values[i] = 1; - value_lens[i] = sizeof(int); - } -} - -void free_key_values() { - int i; - - for (i = 0; i < KEYS; i++) { - free(keys[i]); - free(values[i]); - } -} - -int main(int argc, char **argv) { - int ret; - int provided; - int i; - struct mdhim_t *md; - struct mdhim_brm_t *brm, *brmp; - struct mdhim_bgetrm_t *bgrm; - struct timeval start_tv, end_tv; - char *db_name = "mdhimTstDB-"; - int dbug = MLOG_CRIT; //MLOG_CRIT=1, MLOG_DBG=2 - mdhim_options_t *db_opts; // Local variable for db create options to be passed - int db_type = LEVELDB; // (data_store.h) - int size; - long double flush_time = 0; - long double put_time = 0; - long double get_time = 0; - int total_keys = 0; - int round = 0; - char *paths[] = {"/tmp/"}; - MPI_Comm comm; - - // Create options for DB initialization - db_opts = mdhim_options_init(); - mdhim_options_set_db_paths(db_opts, paths, 1); - mdhim_options_set_db_name(db_opts, db_name); - mdhim_options_set_db_type(db_opts, db_type); - mdhim_options_set_key_type(db_opts, MDHIM_LONG_INT_KEY); - mdhim_options_set_max_recs_per_slice(db_opts, SLICE_SIZE); - mdhim_options_set_server_factor(db_opts, 1); - mdhim_options_set_debug_level(db_opts, dbug); - - //Initialize MPI with multiple thread support - ret = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if (ret != MPI_SUCCESS) { - printf("Error initializing MPI with threads\n"); - exit(1); - } - - //Quit if MPI didn't initialize with multiple threads - if (provided != MPI_THREAD_MULTIPLE) { - printf("Not able to enable MPI_THREAD_MULTIPLE mode\n"); - exit(1); - } - - //Initialize MDHIM - comm = MPI_COMM_WORLD; - md = mdhimInit(&comm, db_opts); - if (!md) { - printf("Error initializing MDHIM\n"); - MPI_Abort(MPI_COMM_WORLD, ret); - exit(1); - } - - key_lens = malloc(sizeof(uint64_t) * KEYS); - value_lens = malloc(sizeof(int) * KEYS); - keys = malloc(sizeof(uint64_t *) * KEYS); - values = malloc(sizeof(int *) * KEYS); - MPI_Comm_size(md->mdhim_comm, &size); - MPI_Barrier(MPI_COMM_WORLD); - //record the start time - start_record(&start_tv); - while (total_keys != TOTAL_KEYS) { - //Populate the keys and values to insert - gen_keys_values(md->mdhim_rank, total_keys); - //Insert the keys into MDHIM - brm = mdhimBPut(md, md->primary_index, (void **) keys, key_lens, - (void **) values, value_lens, KEYS); - // MPI_Barrier(MPI_COMM_WORLD); - - //Iterate through the return messages to see if there is an error and to free it - brmp = brm; - if (!brmp || brmp->error) { - printf("Rank - %d: Error inserting keys/values into MDHIM\n", md->mdhim_rank); - } - while (brmp) { - if (brmp->error < 0) { - printf("Rank: %d - Error inserting key/values info MDHIM\n", md->mdhim_rank); - } - - brm = brmp; - brmp = brmp->next; - //Free the message - mdhim_full_release_msg(brm); - - } - - free_key_values(); - total_keys += KEYS; - round++; - } - - //Record the end time - end_record(&end_tv); - //Add the final time - add_time(&start_tv, &end_tv, &put_time); - MPI_Barrier(MPI_COMM_WORLD); - //Get the stats - start_record(&start_tv); - ret = mdhimStatFlush(md, md->primary_index); - // MPI_Barrier(MPI_COMM_WORLD); - end_record(&end_tv); - add_time(&start_tv, &end_tv, &flush_time); - - if (ret != MDHIM_SUCCESS) { - printf("Error getting stats from MDHIM database\n"); - } else { -// printf("Got stats\n"); - } - - total_keys = 0; - while (total_keys != TOTAL_KEYS) { - //Populate the keys and values we expect to retrieve - gen_keys_values(md->mdhim_rank, total_keys); - start_record(&start_tv); - //Get the keys and values back starting from and including key[0] - bgrm = mdhimBGetOp(md, md->primary_index, keys[0], sizeof(uint64_t), - KEYS, MDHIM_GET_NEXT); - // MPI_Barrier(MPI_COMM_WORLD); - end_record(&end_tv); - add_time(&start_tv, &end_tv, &get_time); - //Check if there is an error - if (!bgrm || bgrm->error) { - printf("Rank: %d - Error retrieving values starting at: %llu", - md->mdhim_rank, (long long unsigned int) *keys[0]); - goto done; - } - - //Validate that the data retrieved is the correct data - for (i = 0; i < bgrm->num_keys && !bgrm->error; i++) { - assert(*(uint64_t *)bgrm->keys[i] == *keys[i]); - assert(*(int *)bgrm->values[i] == *values[i]); - } - - //Free the message received - mdhim_full_release_msg(bgrm); - free_key_values(); - total_keys += KEYS; - round++; - } - - free(key_lens); - free(keys); - free(values); - free(value_lens); -done: - MPI_Barrier(MPI_COMM_WORLD); - //Quit MDHIM - ret = mdhimClose(md); - mdhim_options_destroy(db_opts); - gettimeofday(&end_tv, NULL); - - if (ret != MDHIM_SUCCESS) { - printf("Error closing MDHIM\n"); - } - - MPI_Barrier(MPI_COMM_WORLD); - MPI_Finalize(); - - printf("Took: %Lf seconds to put %d keys\n", - put_time, TOTAL_KEYS); - printf("Took: %Lf seconds to get %d keys/values\n", - get_time, TOTAL_KEYS); - printf("Took: %Lf seconds to stat flush\n", - flush_time); - - return 0; -} diff --git a/meta/tests/single_tests/plfs-put-get.c b/meta/tests/single_tests/plfs-put-get.c deleted file mode 100644 index d32d76465..000000000 --- a/meta/tests/single_tests/plfs-put-get.c +++ /dev/null @@ -1,165 +0,0 @@ -#include -#include -#include -#include "mpi.h" -#include "mdhim.h" -#include "mdhim_options.h" - -#define SLICE_SIZE 1 - -struct plfs_record { - unsigned long long int logical_offset; - unsigned long long int size; - char dropping_file[PATH_MAX]; - unsigned long long int physical_offset; -}; - -FILE *open_output(int rank) { - FILE *file; - char rank_str[4]; - char file_str[4]; - char char_str[2]; - int i, j; - char filename[PATH_MAX]; - - //Opens the file and coverts the rank to characters - sprintf(rank_str, "%d", rank); - memset(file_str, 0, 4); - memset(file_str, 'a', 3); - j = strlen(file_str) - 1; - for (i = strlen(rank_str) - 1; i >= 0; i--) { - sprintf(char_str, "%c", rank_str[i]); - file_str[j] = strtol(char_str, NULL, 10) + 'a'; - j--; - } - - sprintf(filename, "plfs-output/plfs%s", file_str); - printf("file string is: %s\n", filename); - file = fopen(filename, "r"); - if (!file) { - printf("Error opening the input file"); - } - - return file; -} - -struct plfs_record *parse_input(FILE *file) { - struct plfs_record *rec; - int ret; - - rec = malloc(sizeof(struct plfs_record)); - ret = fscanf(file, "%llu %llu %s %llu", &rec->logical_offset, &rec->size, - rec->dropping_file, &rec->physical_offset); - if (!ret || ret == EOF) { - printf("Error parsing file\n"); - exit(1); - } - - printf("Parsed record with logical_offset: %llu, size: %llu, dropping_file: %s," - " physical_offset: %llu\n", - rec->logical_offset, rec->size, - rec->dropping_file, rec->physical_offset); - - return rec; -} - -long long get_key(unsigned long long int lo) { - unsigned long long int ret = ((unsigned long long int) lo/SLICE_SIZE) * SLICE_SIZE; - return ret; -} - -int main(int argc, char **argv) { - int ret; - int provided = 0; - struct mdhim_t *md; - struct mdhim_brm_t *brm; - struct mdhim_bgetrm_t *bgrm; - mdhim_options_t *db_opts; - struct plfs_record *rec = NULL; - FILE *file; - unsigned long long int key; - MPI_Comm comm; - - ret = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if (ret != MPI_SUCCESS) { - printf("Error initializing MPI with threads\n"); - exit(1); - } - - if (provided != MPI_THREAD_MULTIPLE) { - printf("Not able to enable MPI_THREAD_MULTIPLE mode\n"); - exit(1); - } - - //Set MDHIM options - db_opts = mdhim_options_init(); - mdhim_options_set_db_path(db_opts, "./"); - mdhim_options_set_db_name(db_opts, "mdhimTstDB"); - mdhim_options_set_db_type(db_opts, LEVELDB); - mdhim_options_set_key_type(db_opts, MDHIM_LONG_INT_KEY); - mdhim_options_set_debug_level(db_opts, MLOG_CRIT); - mdhim_options_set_max_recs_per_slice(db_opts, SLICE_SIZE); - mdhim_options_set_server_factor(db_opts, 10); - mdhim_options_set_value_append(db_opts, 1); - - //Initialize MDHIM - comm = MPI_COMM_WORLD; - md = mdhimInit(&comm, db_opts); - if (!md) { - printf("Error initializing MDHIM\n"); - exit(1); - } - - file = open_output(md->mdhim_rank); - if (!file) { - printf("Error opening file\n"); - goto done; - } - - rec = parse_input(file); - if (!rec) { - printf("Error parsing file\n"); - goto done; - } - key = get_key(rec->logical_offset); - printf("Inserting key: %llu\n", key); - brm = mdhimPut(md, &key, sizeof(key), - rec, sizeof(struct plfs_record), NULL, NULL); - if (!brm || brm->error) { - printf("Error inserting key/value into MDHIM\n"); - } else { - printf("Successfully inserted key/value into MDHIM\n"); - } - - mdhim_full_release_msg(brm); - //Commit the database - ret = mdhimCommit(md, md->primary_index); - if (ret != MDHIM_SUCCESS) { - printf("Error committing MDHIM database\n"); - } else { - printf("Committed MDHIM database\n"); - } - - bgrm = mdhimGet(md, md->primary_index, &key, sizeof(key), - MDHIM_GET_EQ); - if (!bgrm || bgrm->error) { - printf("Error getting value for key: %llu from MDHIM\n", key); - } else if (bgrm->value_lens[0]) { - printf("Successfully got value: %d from MDHIM\n", *((int *) bgrm->values[0])); - } - - mdhim_full_release_msg(bgrm); - -done: - ret = mdhimClose(md); - free(rec); - mdhim_options_destroy(db_opts); - if (ret != MDHIM_SUCCESS) { - printf("Error closing MDHIM\n"); - } - - MPI_Barrier(MPI_COMM_WORLD); - MPI_Finalize(); - - return 0; -} diff --git a/meta/tests/single_tests/put-del.c b/meta/tests/single_tests/put-del.c deleted file mode 100644 index 8ff546abf..000000000 --- a/meta/tests/single_tests/put-del.c +++ /dev/null @@ -1,83 +0,0 @@ -#include -#include -#include "mpi.h" -#include "mdhim.h" - -int main(int argc, char **argv) { - int ret; - int provided = 0; - struct mdhim_t *md; - int key; - int value; - struct mdhim_brm_t *brm; - struct mdhim_bgetrm_t *bgrm; - char *db_path = "./"; - char *db_name = "mdhimTstDB"; - int dbug = MLOG_CRIT; - mdhim_options_t *db_opts; // Local variable for db create options to be passed - int db_type = LEVELDB; // (data_store.h) - MPI_Comm comm; - - // Create options for DB initialization - db_opts = mdhim_options_init(); - mdhim_options_set_db_path(db_opts, db_path); - mdhim_options_set_db_name(db_opts, db_name); - mdhim_options_set_db_type(db_opts, db_type); - mdhim_options_set_key_type(db_opts, MDHIM_INT_KEY); - mdhim_options_set_debug_level(db_opts, dbug); - ret = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if (ret != MPI_SUCCESS) { - printf("Error initializing MPI with threads\n"); - exit(1); - } - - if (provided != MPI_THREAD_MULTIPLE) { - printf("Not able to enable MPI_THREAD_MULTIPLE mode\n"); - exit(1); - } - - comm = MPI_COMM_WORLD; - md = mdhimInit(&comm, db_opts); - if (!md) { - printf("Error initializing MDHIM\n"); - exit(1); - } - - //Put the keys and values - key = 20 * (md->mdhim_rank + 1); - value = 1000 * (md->mdhim_rank + 1); - brm = mdhimPut(md, &key, sizeof(key), - &value, sizeof(value), - NULL, NULL); - if (!brm || brm->error) { - printf("Error inserting key/value into MDHIM\n"); - } else { - printf("Successfully inserted key/value into MDHIM\n"); - } - - brm = mdhimDelete(md, md->primary_index, &key, sizeof(key)); - if (!brm || brm->error) { - printf("Error deleting key/value from MDHIM\n"); - } else { - printf("Successfully deleted key/value into MDHIM\n"); - } - - //Get the values - value = 0; - bgrm = mdhimGet(md, md->primary_index, &key, sizeof(key), MDHIM_GET_EQ); - if (!bgrm || bgrm->error) { - printf("Error getting value for key: %d from MDHIM\n", key); - } else if (bgrm->value_lens[0]) { - printf("Successfully got value: %d from MDHIM\n", *((int *) bgrm->values[0])); - } - - ret = mdhimClose(md); - if (ret != MDHIM_SUCCESS) { - printf("Error closing MDHIM\n"); - } - - MPI_Barrier(MPI_COMM_WORLD); - MPI_Finalize(); - - return 0; -} diff --git a/meta/tests/single_tests/put-del_secondary.c b/meta/tests/single_tests/put-del_secondary.c deleted file mode 100644 index 781a8b078..000000000 --- a/meta/tests/single_tests/put-del_secondary.c +++ /dev/null @@ -1,115 +0,0 @@ -#include -#include -#include "mpi.h" -#include "mdhim.h" -#include "mdhim_options.h" - -#define SLICE_SIZE 100 -#define SECONDARY_SLICE_SIZE 5 - -int main(int argc, char **argv) { - int ret; - int provided = 0; - struct mdhim_t *md; - uint32_t key, **secondary_keys; - int value, *secondary_key_lens; - struct mdhim_brm_t *brm; - struct mdhim_bgetrm_t *bgrm; - mdhim_options_t *db_opts; - struct index_t *secondary_index; - struct secondary_info *secondary_info; - MPI_Comm comm; - - ret = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if (ret != MPI_SUCCESS) { - printf("Error initializing MPI with threads\n"); - exit(1); - } - - if (provided != MPI_THREAD_MULTIPLE) { - printf("Not able to enable MPI_THREAD_MULTIPLE mode\n"); - exit(1); - } - - db_opts = mdhim_options_init(); - mdhim_options_set_db_path(db_opts, "./"); - mdhim_options_set_db_name(db_opts, "mdhimTstDB"); - mdhim_options_set_db_type(db_opts, LEVELDB); - mdhim_options_set_key_type(db_opts, MDHIM_INT_KEY); //Key_type = 1 (int) - mdhim_options_set_debug_level(db_opts, MLOG_CRIT); - - comm = MPI_COMM_WORLD; - md = mdhimInit(&comm, db_opts); - if (!md) { - printf("Error initializing MDHIM\n"); - exit(1); - } - - //Put the primary keys and secondary keys - key = 100 * (md->mdhim_rank + 1); - value = 500 * (md->mdhim_rank + 1); - secondary_keys = malloc(sizeof(uint32_t *)); - secondary_keys[0] = malloc(sizeof(uint32_t)); - *secondary_keys[0] = md->mdhim_rank + 1; - secondary_key_lens = malloc(sizeof(int)); - secondary_key_lens[0] = sizeof(uint32_t); - //Create a secondary index - secondary_index = create_global_index(md, 2, - SECONDARY_SLICE_SIZE, LEVELDB, - MDHIM_INT_KEY, NULL); - secondary_info = mdhimCreateSecondaryInfo(secondary_index, - (void **) secondary_keys, - secondary_key_lens, 1, - SECONDARY_GLOBAL_INFO); - brm = mdhimPut(md, &key, sizeof(key), - &value, sizeof(value), - secondary_info, NULL); - if (!brm || brm->error) { - printf("Error inserting key/value into MDHIM\n"); - } else { - printf("Successfully inserted key/value into MDHIM\n"); - } - - //Release the received message - mdhim_full_release_msg(brm); - - //Commit the database - ret = mdhimCommit(md, md->primary_index); - if (ret != MDHIM_SUCCESS) { - printf("Error committing MDHIM database\n"); - } else { - printf("Committed MDHIM database\n"); - } - - brm = mdhimDelete(md, secondary_index, secondary_keys[0], secondary_key_lens[0]); - if (!brm || brm->error) { - printf("Error deleting key/value from MDHIM\n"); - } else { - printf("Successfully deleted key/value into MDHIM\n"); - } - - //Get the primary key values from the secondary key - this should fail - value = 0; - bgrm = mdhimGet(md, secondary_index, secondary_keys[0], secondary_key_lens[0], - MDHIM_GET_PRIMARY_EQ); - if (!bgrm || bgrm->error) { - printf("Error getting value for key: %d from MDHIM\n", key); - } else if (bgrm->value_lens[0]) { - printf("Successfully got value: %d from MDHIM\n", *((int *) bgrm->values[0])); - } - - mdhim_full_release_msg(bgrm); - free(secondary_keys[0]); - free(secondary_keys); - free(secondary_key_lens); - ret = mdhimClose(md); - mdhim_options_destroy(db_opts); - if (ret != MDHIM_SUCCESS) { - printf("Error closing MDHIM\n"); - } - - MPI_Barrier(MPI_COMM_WORLD); - MPI_Finalize(); - - return 0; -} diff --git a/meta/tests/single_tests/put-del_secondary_local.c b/meta/tests/single_tests/put-del_secondary_local.c deleted file mode 100644 index 1b12d4416..000000000 --- a/meta/tests/single_tests/put-del_secondary_local.c +++ /dev/null @@ -1,118 +0,0 @@ -#include -#include -#include "mpi.h" -#include "mdhim.h" -#include "mdhim_options.h" - -#define SLICE_SIZE 100 -#define SECONDARY_SLICE_SIZE 5 - -int main(int argc, char **argv) { - int ret; - int provided = 0; - struct mdhim_t *md; - uint32_t key, **secondary_keys; - int value, *secondary_key_lens; - struct mdhim_brm_t *brm; - struct mdhim_bgetrm_t *bgrm; - mdhim_options_t *db_opts; - struct index_t *secondary_local_index; - struct secondary_info *secondary_local_info; - MPI_Comm comm; - - ret = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if (ret != MPI_SUCCESS) { - printf("Error initializing MPI with threads\n"); - exit(1); - } - - if (provided != MPI_THREAD_MULTIPLE) { - printf("Not able to enable MPI_THREAD_MULTIPLE mode\n"); - exit(1); - } - - db_opts = mdhim_options_init(); - mdhim_options_set_db_path(db_opts, "./"); - mdhim_options_set_db_name(db_opts, "mdhimTstDB"); - mdhim_options_set_db_type(db_opts, LEVELDB); - mdhim_options_set_key_type(db_opts, MDHIM_INT_KEY); //Key_type = 1 (int) - mdhim_options_set_debug_level(db_opts, MLOG_CRIT); - - comm = MPI_COMM_WORLD; - md = mdhimInit(&comm, db_opts); - if (!md) { - printf("Error initializing MDHIM\n"); - exit(1); - } - - //Put the primary keys and secondary keys - key = 100 * (md->mdhim_rank + 1); - value = 500 * (md->mdhim_rank + 1); - - //Create a secondary index - secondary_local_index = create_local_index(md, MDHIM_INT_KEY, - md->primary_index->id, NULL); - secondary_keys = malloc(sizeof(uint32_t *)); - secondary_keys[0] = malloc(sizeof(uint32_t)); - *secondary_keys[0] = md->mdhim_rank + 1; - secondary_key_lens = malloc(sizeof(int)); - secondary_key_lens[0] = sizeof(uint32_t); - secondary_local_info = mdhimCreateSecondaryInfo(secondary_local_index, - (void **) secondary_keys, - secondary_key_lens, 1, - SECONDARY_LOCAL_INFO); - brm = mdhimPut(md, &key, sizeof(key), - &value, sizeof(value), - NULL, secondary_local_info); - if (!brm || brm->error) { - printf("Error inserting key/value into MDHIM\n"); - } else { - printf("Successfully inserted key/value into MDHIM\n"); - } - - //Release the received message - mdhim_full_release_msg(brm); - mdhimReleaseSecondaryInfo(secondary_local_info); - - //Commit the database - ret = mdhimCommit(md, md->primary_index); - if (ret != MDHIM_SUCCESS) { - printf("Error committing MDHIM database\n"); - } else { - printf("Committed MDHIM database\n"); - } - - brm = mdhimDelete(md, secondary_local_index, secondary_keys[0], - secondary_key_lens[0]); - if (!brm || brm->error) { - printf("Error deleting key/value from MDHIM\n"); - } else { - printf("Successfully deleted key/value into MDHIM\n"); - } - - //Get the primary key values from the secondary key - this should fail - value = 0; - bgrm = mdhimGet(md, secondary_local_index, - secondary_keys[0], secondary_key_lens[0], - MDHIM_GET_PRIMARY_EQ); - if (!bgrm || bgrm->error) { - printf("Error getting value for key: %d from MDHIM\n", key); - } else if (bgrm->value_lens[0]) { - printf("Successfully got value: %d from MDHIM\n", *((int *) bgrm->values[0])); - } - - mdhim_full_release_msg(bgrm); - free(secondary_keys[0]); - free(secondary_keys); - free(secondary_key_lens); - ret = mdhimClose(md); - mdhim_options_destroy(db_opts); - if (ret != MDHIM_SUCCESS) { - printf("Error closing MDHIM\n"); - } - - MPI_Barrier(MPI_COMM_WORLD); - MPI_Finalize(); - - return 0; -} diff --git a/meta/tests/single_tests/put-get.c b/meta/tests/single_tests/put-get.c deleted file mode 100644 index d3c9bc390..000000000 --- a/meta/tests/single_tests/put-get.c +++ /dev/null @@ -1,85 +0,0 @@ -#include -#include -#include "mpi.h" -#include "mdhim.h" -#include "mdhim_options.h" - -int main(int argc, char **argv) { - int ret; - int provided = 0; - struct mdhim_t *md; - int key; - int value; - struct mdhim_brm_t *brm; - struct mdhim_bgetrm_t *bgrm; - mdhim_options_t *db_opts; - MPI_Comm comm; - - ret = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if (ret != MPI_SUCCESS) { - printf("Error initializing MPI with threads\n"); - exit(1); - } - - if (provided != MPI_THREAD_MULTIPLE) { - printf("Not able to enable MPI_THREAD_MULTIPLE mode\n"); - exit(1); - } - - db_opts = mdhim_options_init(); - mdhim_options_set_db_path(db_opts, "./"); - mdhim_options_set_db_name(db_opts, "mdhim"); - mdhim_options_set_db_type(db_opts, LEVELDB); - mdhim_options_set_key_type(db_opts, MDHIM_INT_KEY); //Key_type = 1 (int) - mdhim_options_set_debug_level(db_opts, MLOG_CRIT); - mdhim_options_set_server_factor(db_opts, 1); - - comm = MPI_COMM_WORLD; - md = mdhimInit(&comm, db_opts); - if (!md) { - printf("Error initializing MDHIM\n"); - exit(1); - } - - //Put the keys and values - key = 100 * (md->mdhim_rank + 1); - value = 500 * (md->mdhim_rank + 1); - brm = mdhimPut(md, &key, sizeof(key), - &value, sizeof(value), - NULL, NULL); - if (!brm || brm->error) { - printf("Error inserting key/value into MDHIM\n"); - } else { - printf("Successfully inserted key/value into MDHIM\n"); - } - - mdhim_full_release_msg(brm); - //Commit the database - ret = mdhimCommit(md, md->primary_index); - if (ret != MDHIM_SUCCESS) { - printf("Error committing MDHIM database\n"); - } else { - printf("Committed MDHIM database\n"); - } - - //Get the values - value = 0; - bgrm = mdhimGet(md, md->primary_index, &key, sizeof(key), MDHIM_GET_EQ); - if (!bgrm || bgrm->error) { - printf("Error getting value for key: %d from MDHIM\n", key); - } else { - printf("Successfully got value: %d from MDHIM\n", *((int *) bgrm->values[0])); - } - - mdhim_full_release_msg(bgrm); - ret = mdhimClose(md); - mdhim_options_destroy(db_opts); - if (ret != MDHIM_SUCCESS) { - printf("Error closing MDHIM\n"); - } - - MPI_Barrier(MPI_COMM_WORLD); - MPI_Finalize(); - - return 0; -} diff --git a/meta/tests/single_tests/put-get_2secondary.c b/meta/tests/single_tests/put-get_2secondary.c deleted file mode 100644 index a843dff17..000000000 --- a/meta/tests/single_tests/put-get_2secondary.c +++ /dev/null @@ -1,111 +0,0 @@ -#include -#include -#include "mpi.h" -#include "mdhim.h" -#include "mdhim_options.h" - -#define SLICE_SIZE 100 -#define SECONDARY_SLICE_SIZE 5 - -int main(int argc, char **argv) { - int ret; - int provided = 0; - struct mdhim_t *md; - uint32_t key, secondary_key; - int value; - struct mdhim_brm_t *brm; - struct mdhim_bgetrm_t *bgrm; - mdhim_options_t *db_opts; - struct index_t *secondary_local_index; - struct secondary_info *secondary_info; - MPI_Comm comm; - - ret = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if (ret != MPI_SUCCESS) { - printf("Error initializing MPI with threads\n"); - exit(1); - } - - if (provided != MPI_THREAD_MULTIPLE) { - printf("Not able to enable MPI_THREAD_MULTIPLE mode\n"); - exit(1); - } - - db_opts = mdhim_options_init(); - mdhim_options_set_db_path(db_opts, "./"); - mdhim_options_set_db_name(db_opts, "mdhimTstDB"); - mdhim_options_set_db_type(db_opts, LEVELDB); - mdhim_options_set_key_type(db_opts, MDHIM_INT_KEY); //Key_type = 1 (int) - mdhim_options_set_debug_level(db_opts, MLOG_CRIT); - - comm = MPI_COMM_WORLD; - md = mdhimInit(&comm, db_opts); - if (!md) { - printf("Error initializing MDHIM\n"); - exit(1); - } - - //Put the primary keys and values - key = 100 * (md->mdhim_rank + 1); - value = 500 * (md->mdhim_rank + 1); - - //Create a secondary index on only one range server - secondary_local_index = create_local_index(md, LEVELDB, - MDHIM_INT_KEY); - secondary_info = mdhimCreateSecondaryInfo(NULL, NULL, 0, - secondary_local_index, &secondary_key, - sizeof(secondary_key)); - secondary_info2 = mdhimCreateSecondaryInfo(NULL, NULL, 0, - secondary_local_index, &secondary_key2, - sizeof(secondary_key2)); - secondary_key = md->mdhim_rank + 1; - brm = mdhimPut(md, - &key, sizeof(key), - &value, sizeof(value), secondary_info); - if (!brm || brm->error) { - printf("Error inserting key/value into MDHIM\n"); - } else { - printf("Successfully inserted key/value into MDHIM\n"); - } - - //Release the received message - mdhim_full_release_msg(brm); - - //Commit the database - ret = mdhimCommit(md, md->primary_index); - if (ret != MDHIM_SUCCESS) { - printf("Error committing MDHIM database\n"); - } else { - printf("Committed MDHIM database\n"); - } - - //Get the stats for the secondary index so the client figures out who to query - ret = mdhimStatFlush(md, secondary_local_index); - if (ret != MDHIM_SUCCESS) { - printf("Error getting stats\n"); - } else { - printf("Got stats\n"); - } - - //Get the primary key values from the secondary local key - value = 0; - bgrm = mdhimGet(md, secondary_local_index, &secondary_key, sizeof(secondary_key), - MDHIM_GET_PRIMARY_EQ); - if (!bgrm || bgrm->error) { - printf("Error getting value for key: %d from MDHIM\n", key); - } else if (bgrm->value_lens[0]) { - printf("Successfully got value: %d from MDHIM\n", *((int *) bgrm->values[0])); - } - - mdhim_full_release_msg(bgrm); - ret = mdhimClose(md); - mdhim_options_destroy(db_opts); - if (ret != MDHIM_SUCCESS) { - printf("Error closing MDHIM\n"); - } - - MPI_Barrier(MPI_COMM_WORLD); - MPI_Finalize(); - - return 0; -} diff --git a/meta/tests/single_tests/put-get_2secondary_local.c b/meta/tests/single_tests/put-get_2secondary_local.c deleted file mode 100644 index 6ecb7b52c..000000000 --- a/meta/tests/single_tests/put-get_2secondary_local.c +++ /dev/null @@ -1,173 +0,0 @@ -#include -#include -#include "mpi.h" -#include "mdhim.h" -#include "mdhim_options.h" - -#define SLICE_SIZE 100 -#define SECONDARY_SLICE_SIZE 5 - -int main(int argc, char **argv) { - int ret; - int provided = 0; - struct mdhim_t *md; - uint32_t key, key2, **secondary_keys, **secondary_keys2; - int value, *secondary_key_lens, *secondary_key_lens2; - struct mdhim_brm_t *brm; - struct mdhim_bgetrm_t *bgrm; - mdhim_options_t *db_opts; - struct index_t *secondary_local_index, *secondary_local_index2; - struct secondary_info *secondary_info, *secondary_info2; - MPI_Comm comm; - - ret = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if (ret != MPI_SUCCESS) { - printf("Error initializing MPI with threads\n"); - exit(1); - } - - if (provided != MPI_THREAD_MULTIPLE) { - printf("Not able to enable MPI_THREAD_MULTIPLE mode\n"); - exit(1); - } - - db_opts = mdhim_options_init(); - mdhim_options_set_db_path(db_opts, "./"); - mdhim_options_set_db_name(db_opts, "mdhimTstDB"); - mdhim_options_set_db_type(db_opts, LEVELDB); - mdhim_options_set_key_type(db_opts, MDHIM_INT_KEY); //Key_type = 1 (int) - mdhim_options_set_debug_level(db_opts, MLOG_CRIT); - - comm = MPI_COMM_WORLD; - md = mdhimInit(&comm, db_opts); - if (!md) { - printf("Error initializing MDHIM\n"); - exit(1); - } - - //Put the primary keys and values - key = 100 * (md->mdhim_rank + 1); - value = 500 * (md->mdhim_rank + 1); - - secondary_keys = malloc(sizeof(uint32_t *)); - secondary_keys[0] = malloc(sizeof(uint32_t)); - *secondary_keys[0] = md->mdhim_rank + 1; - secondary_key_lens = malloc(sizeof(int)); - secondary_key_lens[0] = sizeof(uint32_t); - - secondary_keys2 = malloc(sizeof(uint32_t *)); - secondary_keys2[0] = malloc(sizeof(uint32_t)); - *secondary_keys2[0] = md->mdhim_rank + 1; - secondary_key_lens2 = malloc(sizeof(int)); - secondary_key_lens2[0] = sizeof(uint32_t); - - //Create a secondary index on only one range server - secondary_local_index = create_local_index(md, LEVELDB, - MDHIM_INT_KEY, NULL); - secondary_local_index2 = create_local_index(md, LEVELDB, - MDHIM_INT_KEY, NULL); - secondary_info = mdhimCreateSecondaryInfo(secondary_local_index, - (void **) secondary_keys, - secondary_key_lens, 1, - SECONDARY_LOCAL_INFO); - secondary_info2 = mdhimCreateSecondaryInfo(secondary_local_index2, - (void **) secondary_keys2, - secondary_key_lens2, 1, - SECONDARY_LOCAL_INFO); - brm = mdhimPut(md, - &key, sizeof(key), - &value, sizeof(value), - NULL, secondary_info); - if (!brm || brm->error) { - printf("Error inserting key/value into MDHIM\n"); - } else { - printf("Successfully inserted key/value into MDHIM\n"); - } - - //Release the received message - mdhim_full_release_msg(brm); - - //Insert a new key with the second secondary key - key2 = 200 * (md->mdhim_rank + 1); - brm = mdhimPut(md, - &key2, sizeof(key2), - &value, sizeof(value), - NULL, secondary_info2); - if (!brm || brm->error) { - printf("Error inserting key/value into MDHIM\n"); - } else { - printf("Successfully inserted key/value into MDHIM\n"); - } - - //Release the received message - mdhim_full_release_msg(brm); - - //Commit the database - ret = mdhimCommit(md, md->primary_index); - if (ret != MDHIM_SUCCESS) { - printf("Error committing MDHIM database\n"); - } else { - printf("Committed MDHIM database\n"); - } - - //Get the stats for the secondary index so the client figures out who to query - ret = mdhimStatFlush(md, secondary_local_index); - if (ret != MDHIM_SUCCESS) { - printf("Error getting stats\n"); - } else { - printf("Got stats\n"); - } - //Get the stats for the secondary index so the client figures out who to query - ret = mdhimStatFlush(md, secondary_local_index2); - if (ret != MDHIM_SUCCESS) { - printf("Error getting stats\n"); - } else { - printf("Got stats\n"); - } - - //Get the primary key values from the secondary local key - value = 0; - bgrm = mdhimGet(md, secondary_local_index, - secondary_keys[0], - secondary_key_lens[0], - MDHIM_GET_PRIMARY_EQ); - if (!bgrm || bgrm->error) { - printf("Error getting value for key: %d from MDHIM\n", key); - } else if (bgrm->value_lens[0]) { - printf("Successfully got value: %d from MDHIM\n", *((int *) bgrm->values[0])); - } - - mdhim_full_release_msg(bgrm); - - //Get the primary key values from the secondary local key - value = 0; - bgrm = mdhimGet(md, secondary_local_index2, secondary_keys2[0], - secondary_key_lens2[0], - MDHIM_GET_PRIMARY_EQ); - if (!bgrm || bgrm->error) { - printf("Error getting value for key: %d from MDHIM\n", key); - } else if (bgrm->value_lens[0]) { - printf("Successfully got value: %d from MDHIM\n", *((int *) bgrm->values[0])); - } - - mdhim_full_release_msg(bgrm); - - ret = mdhimClose(md); - free(secondary_keys[0]); - free(secondary_keys); - free(secondary_key_lens); - free(secondary_keys2[0]); - free(secondary_keys2); - free(secondary_key_lens2); - mdhim_options_destroy(db_opts); - mdhimReleaseSecondaryInfo(secondary_info); - mdhimReleaseSecondaryInfo(secondary_info2); - if (ret != MDHIM_SUCCESS) { - printf("Error closing MDHIM\n"); - } - - MPI_Barrier(MPI_COMM_WORLD); - MPI_Finalize(); - - return 0; -} diff --git a/meta/tests/single_tests/put-get_secondary.c b/meta/tests/single_tests/put-get_secondary.c deleted file mode 100644 index ef27c2027..000000000 --- a/meta/tests/single_tests/put-get_secondary.c +++ /dev/null @@ -1,134 +0,0 @@ -#include -#include -#include "mpi.h" -#include "mdhim.h" -#include "mdhim_options.h" - -#define SLICE_SIZE 100 -#define SECONDARY_SLICE_SIZE 5 - -int main(int argc, char **argv) { - int ret; - int provided = 0; - struct mdhim_t *md; - uint32_t key, **secondary_keys; - uint32_t skey; - int value, *secondary_key_lens; - struct mdhim_brm_t *brm; - struct mdhim_bgetrm_t *bgrm; - mdhim_options_t *db_opts; - struct index_t *secondary_index; - struct secondary_info *secondary_info; - MPI_Comm comm; - - ret = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if (ret != MPI_SUCCESS) { - printf("Error initializing MPI with threads\n"); - exit(1); - } - - if (provided != MPI_THREAD_MULTIPLE) { - printf("Not able to enable MPI_THREAD_MULTIPLE mode\n"); - exit(1); - } - - db_opts = mdhim_options_init(); - mdhim_options_set_db_path(db_opts, "./"); - mdhim_options_set_db_name(db_opts, "mdhimTstDB"); - mdhim_options_set_db_type(db_opts, LEVELDB); - mdhim_options_set_key_type(db_opts, MDHIM_INT_KEY); //Key_type = 1 (int) - mdhim_options_set_debug_level(db_opts, MLOG_CRIT); - - comm = MPI_COMM_WORLD; - md = mdhimInit(&comm, db_opts); - if (!md) { - printf("Error initializing MDHIM\n"); - exit(1); - } - - //Set the primary keys and values - key = 100 * (md->mdhim_rank + 1); - value = 500 * (md->mdhim_rank + 1); - //Set the secondary keys and values - secondary_keys = malloc(sizeof(uint32_t *)); - - secondary_keys[0] = malloc(sizeof(uint32_t)); - *secondary_keys[0] = md->mdhim_rank + 1; - secondary_key_lens = malloc(sizeof(int)); - secondary_key_lens[0] = sizeof(uint32_t); - - //Create the secondary remote index - secondary_index = create_global_index(md, 2, SECONDARY_SLICE_SIZE, LEVELDB, - MDHIM_INT_KEY, NULL); - //Create the secondary info struct - secondary_info = mdhimCreateSecondaryInfo(secondary_index, - (void **) secondary_keys, - secondary_key_lens, - 1, SECONDARY_GLOBAL_INFO); - //Put the primary and secondary keys - brm = mdhimPut(md, - &key, sizeof(key), - &value, sizeof(value), - secondary_info, - NULL); - - if (!brm || brm->error) { - printf("Error inserting key/value into MDHIM\n"); - } else { - printf("Successfully inserted key/value into MDHIM\n"); - } - mdhim_full_release_msg(brm); - - //Put another secondary key - skey = 2 * (md->mdhim_rank + 1); - brm = mdhimPutSecondary(md, - secondary_index, - /*Secondary key */ - &skey, sizeof(skey), - /* Primary key */ - &key, sizeof(key)); - - if (!brm || brm->error) { - printf("Error inserting key/value into MDHIM\n"); - } else { - printf("Successfully inserted key/value into MDHIM\n"); - } - - //Release the received message - mdhim_full_release_msg(brm); - - //Commit the database - ret = mdhimCommit(md, md->primary_index); - if (ret != MDHIM_SUCCESS) { - printf("Error committing MDHIM database\n"); - } else { - printf("Committed MDHIM database\n"); - } - - //Get the primary key values from the secondary key - value = 0; - - bgrm = mdhimGet(md, secondary_index, - secondary_keys[0], secondary_key_lens[0], - MDHIM_GET_PRIMARY_EQ); - if (!bgrm || bgrm->error) { - printf("Error getting value for key: %d from MDHIM\n", key); - } else if (bgrm->value_lens[0]) { - printf("Successfully got value: %d from MDHIM\n", *((int *) bgrm->values[0])); - } - - mdhim_full_release_msg(bgrm); - ret = mdhimClose(md); - mdhim_options_destroy(db_opts); - if (ret != MDHIM_SUCCESS) { - printf("Error closing MDHIM\n"); - } - - free(secondary_keys[0]); - free(secondary_keys); - free(secondary_key_lens); - MPI_Barrier(MPI_COMM_WORLD); - MPI_Finalize(); - - return 0; -} diff --git a/meta/tests/single_tests/put-get_secondary_local.c b/meta/tests/single_tests/put-get_secondary_local.c deleted file mode 100644 index be01f2f29..000000000 --- a/meta/tests/single_tests/put-get_secondary_local.c +++ /dev/null @@ -1,123 +0,0 @@ -#include -#include -#include "mpi.h" -#include "mdhim.h" -#include "mdhim_options.h" - -#define SLICE_SIZE 100 -#define SECONDARY_SLICE_SIZE 5 - -int main(int argc, char **argv) { - int ret; - int provided = 0; - struct mdhim_t *md; - uint32_t key, **secondary_keys; - int value, *secondary_key_lens; - struct mdhim_brm_t *brm; - struct mdhim_bgetrm_t *bgrm; - mdhim_options_t *db_opts; - struct index_t *secondary_local_index; - struct secondary_info *secondary_info; - MPI_Comm comm; - - ret = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if (ret != MPI_SUCCESS) { - printf("Error initializing MPI with threads\n"); - exit(1); - } - - if (provided != MPI_THREAD_MULTIPLE) { - printf("Not able to enable MPI_THREAD_MULTIPLE mode\n"); - exit(1); - } - - db_opts = mdhim_options_init(); - mdhim_options_set_db_path(db_opts, "./"); - mdhim_options_set_db_name(db_opts, "mdhimTstDB"); - mdhim_options_set_db_type(db_opts, LEVELDB); - mdhim_options_set_key_type(db_opts, MDHIM_INT_KEY); //Key_type = 1 (int) - mdhim_options_set_debug_level(db_opts, MLOG_CRIT); - - comm = MPI_COMM_WORLD; - md = mdhimInit(&comm, db_opts); - if (!md) { - printf("Error initializing MDHIM\n"); - exit(1); - } - - //Put the primary keys and values - key = 100 * (md->mdhim_rank + 1); - value = 500 * (md->mdhim_rank + 1); - - //Create a secondary index on only one range server - secondary_local_index = create_local_index(md, LEVELDB, - MDHIM_INT_KEY, NULL); - secondary_keys = malloc(sizeof(uint32_t *)); - secondary_keys[0] = malloc(sizeof(uint32_t)); - *secondary_keys[0] = md->mdhim_rank + 1; - secondary_keys[1] = malloc(sizeof(uint32_t)); - *secondary_keys[1] = md->mdhim_rank + 2; - secondary_key_lens = malloc(sizeof(int) * 2); - secondary_key_lens[0] = sizeof(uint32_t); - secondary_key_lens[1] = sizeof(uint32_t); - secondary_info = mdhimCreateSecondaryInfo(secondary_local_index, - (void **) secondary_keys, - secondary_key_lens, 2, - SECONDARY_LOCAL_INFO); - brm = mdhimPut(md, &key, sizeof(key), - &value, sizeof(value), - NULL, secondary_info); - if (!brm || brm->error) { - printf("Error inserting key/value into MDHIM\n"); - } else { - printf("Successfully inserted key/value into MDHIM\n"); - } - - //Release the received message - mdhim_full_release_msg(brm); - - //Commit the database - ret = mdhimCommit(md, md->primary_index); - if (ret != MDHIM_SUCCESS) { - printf("Error committing MDHIM database\n"); - } else { - printf("Committed MDHIM database\n"); - } - - //Get the stats for the secondary index so the client figures out who to query - ret = mdhimStatFlush(md, secondary_local_index); - if (ret != MDHIM_SUCCESS) { - printf("Error getting stats\n"); - } else { - printf("Got stats\n"); - } - - //Get the primary key values from the secondary local key - value = 0; - bgrm = mdhimGet(md, secondary_local_index, - &secondary_keys[0][0], - secondary_key_lens[0], - MDHIM_GET_PRIMARY_EQ); - if (!bgrm || bgrm->error) { - printf("Error getting value for key: %d from MDHIM\n", key); - } else if (bgrm->value_lens[0]) { - printf("Successfully got value: %d from MDHIM\n", *((int *) bgrm->values[0])); - } - - mdhim_full_release_msg(bgrm); - ret = mdhimClose(md); - mdhim_options_destroy(db_opts); - mdhimReleaseSecondaryInfo(secondary_info); - if (ret != MDHIM_SUCCESS) { - printf("Error closing MDHIM\n"); - } - - free(secondary_keys[0]); - free(secondary_keys[1]); - free(secondary_keys); - free(secondary_key_lens); - MPI_Barrier(MPI_COMM_WORLD); - MPI_Finalize(); - - return 0; -} diff --git a/meta/tests/single_tests/put-getn.c b/meta/tests/single_tests/put-getn.c deleted file mode 100644 index a66f83c0f..000000000 --- a/meta/tests/single_tests/put-getn.c +++ /dev/null @@ -1,117 +0,0 @@ -#include -#include -#include -#include "mpi.h" -#include "mdhim.h" - -int main(int argc, char **argv) { - int ret; - int provided = 0; - struct mdhim_t *md; - int key; - int value; - struct mdhim_brm_t *brm; - struct mdhim_bgetrm_t *bgrm; - int i; - int keys_per_rank = 100; - char *db_path = "./"; - char *db_name = "mdhimTstDB"; - int dbug = MLOG_CRIT; - mdhim_options_t *db_opts; // Local variable for db create options to be passed - int db_type = LEVELDB; //(data_store.h) - struct timeval start_tv, end_tv; - unsigned totaltime; - MPI_Comm comm; - - // Create options for DB initialization - db_opts = mdhim_options_init(); - mdhim_options_set_db_path(db_opts, db_path); - mdhim_options_set_db_name(db_opts, db_name); - mdhim_options_set_db_type(db_opts, db_type); - mdhim_options_set_key_type(db_opts, MDHIM_INT_KEY); - mdhim_options_set_debug_level(db_opts, dbug); - gettimeofday(&start_tv, NULL); - ret = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if (ret != MPI_SUCCESS) { - printf("Error initializing MPI with threads\n"); - exit(1); - } - - if (provided != MPI_THREAD_MULTIPLE) { - printf("Not able to enable MPI_THREAD_MULTIPLE mode\n"); - exit(1); - } - - comm = MPI_COMM_WORLD; - md = mdhimInit(&comm, db_opts); - if (!md) { - printf("Error initializing MDHIM\n"); - exit(1); - } - - //Put the keys and values - for (i = 0; i < keys_per_rank; i++) { - key = keys_per_rank * md->mdhim_rank + i; - value = md->mdhim_rank + i; - brm = mdhimPut(md, &key, sizeof(key), - &value, sizeof(value), - NULL, NULL); - if (!brm || brm->error) { - printf("Error inserting key/value into MDHIM\n"); - } else { - printf("Rank: %d put key: %d with value: %d\n", md->mdhim_rank, key, value); - } - - mdhim_full_release_msg(brm); - } - - //Commit the database - ret = mdhimCommit(md, md->primary_index); - if (ret != MDHIM_SUCCESS) { - printf("Error committing MDHIM database\n"); - } else { - printf("Committed MDHIM database\n"); - } - - //Get the stats - ret = mdhimStatFlush(md, md->primary_index); - if (ret != MDHIM_SUCCESS) { - printf("Error getting stats\n"); - } else { - printf("Got stats\n"); - } - - //Get the values using get_next - for (i = 0; i < keys_per_rank; i++) { - value = 0; - key = keys_per_rank * md->mdhim_rank + i - 1; - bgrm = mdhimBGetOp(md, md->primary_index, - &key, sizeof(int), 1, MDHIM_GET_NEXT); - if (!bgrm || bgrm->error) { - printf("Rank: %d, Error getting next key/value given key: %d from MDHIM\n", - md->mdhim_rank, key); - } else if (bgrm->keys[0] && bgrm->values[0]) { - printf("Rank: %d successfully got key: %d with value: %d from MDHIM\n", - md->mdhim_rank, - *((int *) bgrm->keys[0]), - *((int *) bgrm->values[0])); - } - - mdhim_full_release_msg(bgrm); - } - - ret = mdhimClose(md); - mdhim_options_destroy(db_opts); - if (ret != MDHIM_SUCCESS) { - printf("Error closing MDHIM\n"); - } - - gettimeofday(&end_tv, NULL); - totaltime = end_tv.tv_sec - start_tv.tv_sec; - MPI_Barrier(MPI_COMM_WORLD); - MPI_Finalize(); - printf("Took %u seconds to insert and retrieve %d keys/values\n", totaltime, - keys_per_rank); - - return 0; -} diff --git a/meta/tests/single_tests/put-getn_secondary.c b/meta/tests/single_tests/put-getn_secondary.c deleted file mode 100644 index a83b5aae6..000000000 --- a/meta/tests/single_tests/put-getn_secondary.c +++ /dev/null @@ -1,150 +0,0 @@ -#include -#include -#include -#include "mpi.h" -#include "mdhim.h" - -#define SECONDARY_SLICE_SIZE 6 - -int main(int argc, char **argv) { - int ret; - int provided = 0; - struct mdhim_t *md; - uint32_t key, **secondary_keys; - int value, *secondary_key_lens; - struct mdhim_brm_t *brm; - struct mdhim_bgetrm_t *bgrm; - int i; - int keys_per_rank = 100; - char *db_path = "./"; - char *db_name = "mdhimTstDB-"; - int dbug = MLOG_DBG; - mdhim_options_t *db_opts; // Local variable for db create options to be passed - int db_type = LEVELDB; //(data_store.h) - struct timeval start_tv, end_tv; - unsigned totaltime; - struct index_t *secondary_index; - struct secondary_info *secondary_info; - MPI_Comm comm; - - // Create options for DB initialization - db_opts = mdhim_options_init(); - mdhim_options_set_db_path(db_opts, db_path); - mdhim_options_set_db_name(db_opts, db_name); - mdhim_options_set_db_type(db_opts, db_type); - mdhim_options_set_key_type(db_opts, MDHIM_INT_KEY); - mdhim_options_set_debug_level(db_opts, dbug); - - gettimeofday(&start_tv, NULL); - ret = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if (ret != MPI_SUCCESS) { - printf("Error initializing MPI with threads\n"); - exit(1); - } - - if (provided != MPI_THREAD_MULTIPLE) { - printf("Not able to enable MPI_THREAD_MULTIPLE mode\n"); - exit(1); - } - - comm = MPI_COMM_WORLD; - md = mdhimInit(&comm, db_opts); - if (!md) { - printf("Error initializing MDHIM\n"); - exit(1); - } - - //Create the secondary remote index - secondary_index = create_global_index(md, 2, SECONDARY_SLICE_SIZE, LEVELDB, - MDHIM_INT_KEY, NULL); - //Put the primary keys and values - for (i = 0; i < keys_per_rank; i++) { - key = keys_per_rank * md->mdhim_rank + i; - value = md->mdhim_rank + i; - //Create the secondary info struct - secondary_keys = malloc(sizeof(uint32_t *)); - secondary_keys[0] = malloc(sizeof(uint32_t)); - *secondary_keys[0] = md->mdhim_rank + i; - secondary_key_lens = malloc(sizeof(int)); - secondary_key_lens[0] = sizeof(uint32_t); - - secondary_info = mdhimCreateSecondaryInfo(secondary_index, - (void **) secondary_keys, - secondary_key_lens, - 1, SECONDARY_GLOBAL_INFO); - brm = mdhimPut(md, - &key, sizeof(key), - &value, sizeof(value), - secondary_info, NULL); - if (!brm || brm->error) { - printf("Error inserting key/value into MDHIM\n"); - } else { - printf("Rank: %d put secondary key: %u with value: %d\n", md->mdhim_rank, - *secondary_keys[0], key); - } - - mdhimReleaseSecondaryInfo(secondary_info); - mdhim_full_release_msg(brm); - free(secondary_keys[0]); - free(secondary_keys); - free(secondary_key_lens); - } - - //Commit the database - ret = mdhimCommit(md, md->primary_index); - if (ret != MDHIM_SUCCESS) { - printf("Error committing MDHIM database\n"); - } else { - printf("Committed MDHIM database\n"); - } - - //Get the stats for the primary index - ret = mdhimStatFlush(md, md->primary_index); - if (ret != MDHIM_SUCCESS) { - printf("Error getting stats\n"); - } else { - printf("Got stats\n"); - } - - //Get the stats for the secondary index - ret = mdhimStatFlush(md, secondary_index); - if (ret != MDHIM_SUCCESS) { - printf("Error getting stats\n"); - } else { - printf("Got stats\n"); - } - - //Get the secondary keys and values using get_next - for (i = 1; i < keys_per_rank; i++) { - value = 0; - key = md->mdhim_rank + i - 1; - bgrm = mdhimBGetOp(md, secondary_index, - &key, sizeof(uint32_t), 1, MDHIM_GET_NEXT); - if (!bgrm || bgrm->error) { - printf("Rank: %d, Error getting next key/value given key: %d from MDHIM\n", - md->mdhim_rank, key); - } else if (bgrm->keys && bgrm->values) { - printf("Rank: %d successfully got key: %d with value: %d from MDHIM\n", - md->mdhim_rank, - *((int *) bgrm->keys[0]), - *((int *) bgrm->values[0])); - } - - mdhim_full_release_msg(bgrm); - } - - ret = mdhimClose(md); - mdhim_options_destroy(db_opts); - if (ret != MDHIM_SUCCESS) { - printf("Error closing MDHIM\n"); - } - - gettimeofday(&end_tv, NULL); - totaltime = end_tv.tv_sec - start_tv.tv_sec; - MPI_Barrier(MPI_COMM_WORLD); - MPI_Finalize(); - printf("Took %u seconds to insert and retrieve %d keys/values\n", totaltime, - keys_per_rank); - - return 0; -} diff --git a/meta/tests/single_tests/put-getn_secondary_local.c b/meta/tests/single_tests/put-getn_secondary_local.c deleted file mode 100644 index 5277c493c..000000000 --- a/meta/tests/single_tests/put-getn_secondary_local.c +++ /dev/null @@ -1,158 +0,0 @@ -#include -#include -#include -#include "mpi.h" -#include "mdhim.h" - -#define SLICE_SIZE 10 - -int main(int argc, char **argv) { - int ret; - int provided = 0; - struct mdhim_t *md; - uint32_t key, **secondary_keys; - int value, *secondary_key_lens; - struct mdhim_brm_t *brm; - struct mdhim_bgetrm_t *bgrm, *bgrmp; - int i; - int keys_per_rank = 100; - char *db_path = "./"; - char *db_name = "mdhimTstDB-"; - int dbug = MLOG_DBG; - mdhim_options_t *db_opts; // Local variable for db create options to be passed - int db_type = LEVELDB; //(data_store.h) - struct timeval start_tv, end_tv; - unsigned totaltime; - struct index_t *secondary_local_index; - struct secondary_info *secondary_info; - MPI_Comm comm; - - // Create options for DB initialization - db_opts = mdhim_options_init(); - mdhim_options_set_db_path(db_opts, db_path); - mdhim_options_set_db_name(db_opts, db_name); - mdhim_options_set_db_type(db_opts, db_type); - mdhim_options_set_key_type(db_opts, MDHIM_INT_KEY); - mdhim_options_set_debug_level(db_opts, dbug); - mdhim_options_set_max_recs_per_slice(db_opts, SLICE_SIZE); - mdhim_options_set_server_factor(db_opts, 4); - - gettimeofday(&start_tv, NULL); - ret = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if (ret != MPI_SUCCESS) { - printf("Error initializing MPI with threads\n"); - exit(1); - } - - if (provided != MPI_THREAD_MULTIPLE) { - printf("Not able to enable MPI_THREAD_MULTIPLE mode\n"); - exit(1); - } - - comm = MPI_COMM_WORLD; - md = mdhimInit(&comm, db_opts); - if (!md) { - printf("Error initializing MDHIM\n"); - exit(1); - } - - - //Create a secondary index on only one range server - secondary_local_index = create_local_index(md, LEVELDB, - MDHIM_INT_KEY, NULL); - //Put the keys and values - for (i = 0; i < keys_per_rank; i++) { - key = keys_per_rank * md->mdhim_rank + i; - value = md->mdhim_rank + i; - //Create the secondary info struct - secondary_keys = malloc(sizeof(uint32_t *)); - secondary_keys[0] = malloc(sizeof(uint32_t)); - *secondary_keys[0] = md->mdhim_rank + i + 1; - secondary_key_lens = malloc(sizeof(int)); - secondary_key_lens[0] = sizeof(uint32_t); - secondary_info = mdhimCreateSecondaryInfo(secondary_local_index, - (void **) secondary_keys, - secondary_key_lens, 1, - SECONDARY_LOCAL_INFO); - brm = mdhimPut(md, &key, sizeof(key), - &value, sizeof(value), NULL, - secondary_info); - if (!brm || brm->error) { - printf("Error inserting key/value into MDHIM\n"); - } else { - printf("Rank: %d put secondary key: %d with value: %d\n", md->mdhim_rank, - *secondary_keys[0], key); - } - - mdhim_full_release_msg(brm); - mdhimReleaseSecondaryInfo(secondary_info); - free(secondary_keys[0]); - free(secondary_keys); - free(secondary_key_lens); - } - - - - //Commit the database - ret = mdhimCommit(md, md->primary_index); - if (ret != MDHIM_SUCCESS) { - printf("Error committing MDHIM database\n"); - } else { - printf("Committed MDHIM database\n"); - } - - //Get the stats - ret = mdhimStatFlush(md, md->primary_index); - if (ret != MDHIM_SUCCESS) { - printf("Error getting stats\n"); - } else { - printf("Got stats\n"); - } - - //Get the stats for the secondary index - ret = mdhimStatFlush(md, secondary_local_index); - if (ret != MDHIM_SUCCESS) { - printf("Error getting stats\n"); - } else { - printf("Got stats\n"); - } - - //Get the values using get_next - for (i = 0; i < keys_per_rank; i++) { - value = 0; - key = md->mdhim_rank + i; - bgrm = mdhimBGetOp(md, secondary_local_index, - &key, sizeof(uint32_t), 1, - MDHIM_GET_NEXT); - while (bgrm) { - if (!bgrm || bgrm->error) { - printf("Rank: %d, Error getting next key/value given key: %d from MDHIM\n", - md->mdhim_rank, key); - } else if (bgrm->keys[0] && bgrm->values[0]) { - printf("Rank: %d successfully got key: %d with value: %d from MDHIM\n", - md->mdhim_rank, - *((int *) bgrm->keys[0]), - *((int *) bgrm->values[0])); - } - - bgrmp = bgrm; - bgrm = bgrm->next; - mdhim_full_release_msg(bgrmp); - } - } - - ret = mdhimClose(md); - mdhim_options_destroy(db_opts); - if (ret != MDHIM_SUCCESS) { - printf("Error closing MDHIM\n"); - } - - gettimeofday(&end_tv, NULL); - totaltime = end_tv.tv_sec - start_tv.tv_sec; - MPI_Barrier(MPI_COMM_WORLD); - MPI_Finalize(); - printf("Took %u seconds to insert and retrieve %d keys/values\n", totaltime, - keys_per_rank); - - return 0; -} diff --git a/meta/tests/single_tests/put-getp.c b/meta/tests/single_tests/put-getp.c deleted file mode 100644 index 62ca4fcb5..000000000 --- a/meta/tests/single_tests/put-getp.c +++ /dev/null @@ -1,104 +0,0 @@ -#include -#include -#include "mpi.h" -#include "mdhim.h" - -int main(int argc, char **argv) { - int ret; - int provided = 0; - struct mdhim_t *md; - int key; - int value; - struct mdhim_brm_t *brm; - struct mdhim_bgetrm_t *bgrm; - int i; - int keys_per_rank = 5; - char *db_path = " "; - char *db_name = "mdhim"; - int dbug = MLOG_CRIT; - mdhim_options_t *db_opts; // Local variable for db create options to be passed - int db_type = LEVELDB; // (data_store.h) - MPI_Comm comm; - - // Create options for DB initialization - db_opts = mdhim_options_init(); - mdhim_options_set_db_path(db_opts, db_path); - mdhim_options_set_db_name(db_opts, db_name); - mdhim_options_set_db_type(db_opts, db_type); - mdhim_options_set_key_type(db_opts, MDHIM_INT_KEY); - mdhim_options_set_debug_level(db_opts, dbug); - - ret = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if (ret != MPI_SUCCESS) { - printf("Error initializing MPI with threads\n"); - exit(1); - } - - if (provided != MPI_THREAD_MULTIPLE) { - printf("Not able to enable MPI_THREAD_MULTIPLE mode\n"); - exit(1); - } - - comm = MPI_COMM_WORLD; - md = mdhimInit(&comm, db_opts); - if (!md) { - printf("Error initializing MDHIM\n"); - exit(1); - } - - //Put the keys and values - for (i = 0; i < keys_per_rank; i++) { - key = keys_per_rank * md->mdhim_rank + i; - value = md->mdhim_rank + i; - brm = mdhimPut(md, &key, sizeof(key), - &value, sizeof(value), - NULL, NULL); - if (!brm || brm->error) { - printf("Error inserting key/value into MDHIM\n"); - } else { - printf("Rank: %d put key: %d with value: %d\n", md->mdhim_rank, key, value); - } - } - - //Commit the database - ret = mdhimCommit(md, md->primary_index); - if (ret != MDHIM_SUCCESS) { - printf("Error committing MDHIM database\n"); - } else { - printf("Committed MDHIM database\n"); - } - - //Get the stats - ret = mdhimStatFlush(md, md->primary_index); - if (ret != MDHIM_SUCCESS) { - printf("Error getting stats\n"); - } else { - printf("Got stats\n"); - } - - //Get the values using get_prev - for (i = keys_per_rank; i > 0; i--) { - value = 0; - key = keys_per_rank * md->mdhim_rank + i; - bgrm = mdhimBGetOp(md, md->primary_index, - &key, sizeof(int), 1, MDHIM_GET_PREV); - if (!bgrm || bgrm->error) { - printf("Error getting value for key: %d from MDHIM\n", key); - } else if (bgrm->keys[0] && bgrm->values[0]) { - printf("Rank: %d successfully got key: %d with value: %d from MDHIM\n", - md->mdhim_rank, - *((int *) bgrm->keys[0]), - *((int *) bgrm->values[0])); - } - } - - ret = mdhimClose(md); - if (ret != MDHIM_SUCCESS) { - printf("Error closing MDHIM\n"); - } - - MPI_Barrier(MPI_COMM_WORLD); - MPI_Finalize(); - - return 0; -} diff --git a/meta/tests/single_tests/put-getp_secondary.c b/meta/tests/single_tests/put-getp_secondary.c deleted file mode 100644 index d91b5ca89..000000000 --- a/meta/tests/single_tests/put-getp_secondary.c +++ /dev/null @@ -1,150 +0,0 @@ -#include -#include -#include -#include "mpi.h" -#include "mdhim.h" - -#define SECONDARY_SLICE_SIZE 5 - -int main(int argc, char **argv) { - int ret; - int provided = 0; - struct mdhim_t *md; - uint32_t key, **secondary_keys; - int value, *secondary_key_lens; - struct mdhim_brm_t *brm; - struct mdhim_bgetrm_t *bgrm; - int i; - int keys_per_rank = 100; - char *db_path = "./"; - char *db_name = "mdhimTstDB-"; - int dbug = MLOG_CRIT; - mdhim_options_t *db_opts; // Local variable for db create options to be passed - int db_type = LEVELDB; //(data_store.h) - struct timeval start_tv, end_tv; - unsigned totaltime; - struct index_t *secondary_index; - struct secondary_info *secondary_info; - MPI_Comm comm; - - // Create options for DB initialization - db_opts = mdhim_options_init(); - mdhim_options_set_db_path(db_opts, db_path); - mdhim_options_set_db_name(db_opts, db_name); - mdhim_options_set_db_type(db_opts, db_type); - mdhim_options_set_key_type(db_opts, MDHIM_INT_KEY); - mdhim_options_set_debug_level(db_opts, dbug); - - gettimeofday(&start_tv, NULL); - ret = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if (ret != MPI_SUCCESS) { - printf("Error initializing MPI with threads\n"); - exit(1); - } - - if (provided != MPI_THREAD_MULTIPLE) { - printf("Not able to enable MPI_THREAD_MULTIPLE mode\n"); - exit(1); - } - - comm = MPI_COMM_WORLD; - md = mdhimInit(&comm, db_opts); - if (!md) { - printf("Error initializing MDHIM\n"); - exit(1); - } - - //Create a secondary index - secondary_index = create_global_index(md, 2, SECONDARY_SLICE_SIZE, LEVELDB, - MDHIM_INT_KEY, NULL); - - //Put the keys and values - for (i = 0; i < keys_per_rank; i++) { - key = keys_per_rank * md->mdhim_rank + i; - value = md->mdhim_rank + i; - secondary_keys = malloc(sizeof(uint32_t *)); - secondary_keys[0] = malloc(sizeof(uint32_t)); - *secondary_keys[0] = md->mdhim_rank + i + 1; - secondary_key_lens = malloc(sizeof(int)); - secondary_key_lens[0] = sizeof(uint32_t); - secondary_info = mdhimCreateSecondaryInfo(secondary_index, (void **) secondary_keys, - secondary_key_lens, 1, - SECONDARY_GLOBAL_INFO); - brm = mdhimPut(md, &key, sizeof(key), - &value, sizeof(value), - secondary_info, NULL); - if (!brm || brm->error) { - printf("Error inserting key/value into MDHIM\n"); - } else { -// printf("Rank: %d put key: %d with value: %d\n", md->mdhim_rank, key, value); - } - - mdhimReleaseSecondaryInfo(secondary_info); - mdhim_full_release_msg(brm); - free(secondary_keys[0]); - free(secondary_keys); - free(secondary_key_lens); - } - - MPI_Barrier(MPI_COMM_WORLD); - - //Commit the database - ret = mdhimCommit(md, md->primary_index); - if (ret != MDHIM_SUCCESS) { - printf("Error committing MDHIM database\n"); - } else { - printf("Committed MDHIM database\n"); - } - - //Get the stats for the primary index - ret = mdhimStatFlush(md, md->primary_index); - if (ret != MDHIM_SUCCESS) { - printf("Error getting stats\n"); - } else { - printf("Got stats\n"); - } - - //Get the stats for the secondary index - ret = mdhimStatFlush(md, secondary_index); - if (ret != MDHIM_SUCCESS) { - printf("Error getting stats\n"); - } else { - printf("Got stats\n"); - } - - MPI_Barrier(MPI_COMM_WORLD); - - //Get the secondary keys and values using get_prev - for (i = keys_per_rank; i > 0; i--) { - value = 0; - key = md->mdhim_rank + i + 2; - bgrm = mdhimBGetOp(md, secondary_index, - &key, sizeof(int), 1, MDHIM_GET_PREV); - if (!bgrm || bgrm->error) { - printf("Rank: %d, Error getting prev key/value given key: %d from MDHIM\n", - md->mdhim_rank, key); - } else if (bgrm->keys[0] && bgrm->values[0]) { - printf("Rank: %d successfully got key: %d with value: %d from MDHIM\n", - md->mdhim_rank, - *((int *) bgrm->keys[0]), - *((int *) bgrm->values[0])); - } - - mdhim_full_release_msg(bgrm); - } - - ret = mdhimClose(md); - mdhim_options_destroy(db_opts); - if (ret != MDHIM_SUCCESS) { - printf("Error closing MDHIM\n"); - } - - gettimeofday(&end_tv, NULL); - totaltime = end_tv.tv_sec - start_tv.tv_sec; - MPI_Barrier(MPI_COMM_WORLD); - MPI_Finalize(); - printf("Took %u seconds to insert and retrieve %d keys/values\n", totaltime, - keys_per_rank); - - return 0; -} diff --git a/meta/tests/single_tests/puts-gets.c b/meta/tests/single_tests/puts-gets.c deleted file mode 100644 index 3973bef0e..000000000 --- a/meta/tests/single_tests/puts-gets.c +++ /dev/null @@ -1,87 +0,0 @@ -#include -#include -#include "mpi.h" -#include "mdhim.h" -#include "mdhim_options.h" - -int main(int argc, char **argv) { - int ret; - int provided = 0; - struct mdhim_t *md; - char *key; - int value; - struct mdhim_brm_t *brm; - struct mdhim_bgetrm_t *bgrm; - mdhim_options_t *db_opts; - int i; - MPI_Comm comm; - - ret = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if (ret != MPI_SUCCESS) { - printf("Error initializing MPI with threads\n"); - exit(1); - } - - if (provided != MPI_THREAD_MULTIPLE) { - printf("Not able to enable MPI_THREAD_MULTIPLE mode\n"); - exit(1); - } - - db_opts = mdhim_options_init(); - mdhim_options_set_db_path(db_opts, "./"); - mdhim_options_set_db_name(db_opts, "mdhimTstDB"); - mdhim_options_set_db_type(db_opts, LEVELDB); - mdhim_options_set_key_type(db_opts, MDHIM_STRING_KEY); - mdhim_options_set_debug_level(db_opts, MLOG_CRIT); - - comm = MPI_COMM_WORLD; - md = mdhimInit(&comm, db_opts); - if (!md) { - printf("Error initializing MDHIM\n"); - exit(1); - } - - //Put the keys and values - for (i = 0; i < 2; i++) { - key = malloc(100); - sprintf(key, "%c", (int) '0' + (md->mdhim_rank + 1) + i); - value = 500 * (md->mdhim_rank + 1) + i; - brm = mdhimPut(md, key, strlen(key) + 1, - &value, sizeof(value), - NULL, NULL); - if (!brm || brm->error) { - printf("Error inserting key/value into MDHIM\n"); - } else { - printf("Successfully inserted key/value into MDHIM\n"); - } - - //Commit the database - ret = mdhimCommit(md, md->primary_index); - if (ret != MDHIM_SUCCESS) { - printf("Error committing MDHIM database\n"); - } else { - printf("Committed MDHIM database\n"); - } - - //Get the values - value = 0; - bgrm = mdhimGet(md, md->primary_index, - key, strlen(key) + 1, MDHIM_GET_EQ); - if (!bgrm || bgrm->error) { - printf("Error getting value for key: %s from MDHIM\n", key); - } else { - printf("Successfully got value: %d from MDHIM for key: %s\n", *((int *) bgrm->values[0]), key); - } - - free(key); - } - ret = mdhimClose(md); - if (ret != MDHIM_SUCCESS) { - printf("Error closing MDHIM\n"); - } - - MPI_Barrier(MPI_COMM_WORLD); - MPI_Finalize(); - - return 0; -} diff --git a/meta/tests/single_tests/range_bget.c b/meta/tests/single_tests/range_bget.c deleted file mode 100644 index 73dd8c836..000000000 --- a/meta/tests/single_tests/range_bget.c +++ /dev/null @@ -1,388 +0,0 @@ -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * - * Copyright 2017, UT-Battelle, LLC. - * - * LLNL-CODE-741539 - * All rights reserved. - * - * This is the license for UnifyFS. - * For details, see https://github.com/LLNL/UnifyFS. - * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. - */ - -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * Copyright (c) 2017, Florida State University. Contributions from - * the Computer Architecture and Systems Research Laboratory (CASTL) - * at the Department of Computer Science. - * - * Written by: Teng Wang, Adam Moody, Weikuan Yu, Kento Sato, Kathryn Mohror - * LLNL-CODE-728877. All rights reserved. - * - * This file is part of burstfs. - * For details, see https://gihub.com/llnl/burstfs - * Please read https://github.com/llnl/burstfs/LICENSE for full license text. - */ - -/* - * - * Copyright (c) 2014, Los Alamos National Laboratory - * All rights reserved. - * - */ - -#include -#include -#include -#include -#include -#include -#include "mpi.h" -#include "mdhim.h" - -#define MANIFEST_FILE_NAME "mdhim_manifest_" -#define GEN_STR_LEN 1024 - -struct timeval putstart, putend; -struct timeval calstart, calend; -struct timeval putreleasestart, putreleaseend; -double puttime; -double putbw; -double putops; -double caltime; -double putreleasetime = 0; - -struct timeval getstart, getend; - -double gettime; -double getbw; -double getops; - -typedef struct { - unsigned long fid; - unsigned long nodeid; - - unsigned long offset; - unsigned long addr; - unsigned long len; -}meta_t; - -typedef struct { - unsigned long fid; - unsigned long offset; -}ulfs_key_t; - -typedef struct { - unsigned long nodeid; - unsigned long len; - unsigned long addr; -}ulfs_val_t; - -int init_meta_lst(meta_t *meta_lst, ulfs_key_t **key_lst, ulfs_val_t **value_lst, \ - long segnum, long transz, int rank, int size); -void init_get_key_lst(ulfs_key_t **get_key_lst,\ - int *get_key_lens, int *get_val_lens,\ - int key_cnt, long gettransz, int rank, int size); - -int main(int argc, char **argv) { - int c, serratio, bulknum, ret, provided, size, path_len; - - long transz, segnum, rangesz, gettransz; - - char db_path[GEN_STR_LEN] = {0}; - char db_name[GEN_STR_LEN] = {0}; - - MPI_Comm comm; - - struct mdhim_t *md; - struct mdhim_brm_t *brm, *brmp; - struct mdhim_bgetrm_t *bgrm, *bgrmp; - mdhim_options_t *db_opts; // Local variable for db create options to be passed - - static const char * opts = "c:t:s:r:n:p:d:g:"; - - while((c = getopt(argc, argv, opts)) != -1){ - switch (c) { - - case 'c': /*number of batched key-value pairs in each bput*/ - bulknum = atoi(optarg); break; - case 's': /*server factor same as MDHIM*/ - serratio = atoi(optarg); break; - case 't': /*transfer size*/ - transz = atol(optarg); break; - case 'g': - gettransz = atol(optarg); break; - case 'r': /*the key range for each slice*/ - rangesz = atol(optarg); break; - case 'n': /*number of transfers*/ - segnum = atol(optarg); break; - case 'p': /*path of the database*/ - strcpy(db_path, optarg); break; - case 'd': /*name of the database*/ - strcpy(db_name, optarg); break; - } - } - - printf("gettransz:%ld,transz:%ld,num:%d\n", gettransz, transz, segnum); - fflush(stdout); - - db_opts = malloc(sizeof(struct mdhim_options_t)); - - db_opts->db_path = db_path; - db_opts->db_name = "ulfsDB"; - db_opts->manifest_path = NULL; - db_opts->db_type = LEVELDB; - db_opts->db_create_new = 1; - db_opts->db_value_append = MDHIM_DB_OVERWRITE; - - db_opts->rserver_factor = serratio; - db_opts->db_paths = NULL; - db_opts->num_paths = 0; - db_opts->num_wthreads = 1; - - path_len = strlen(db_opts->db_path) + strlen(MANIFEST_FILE_NAME) + 1; - - char *manifest_path; - manifest_path = malloc(path_len); - sprintf(manifest_path, "%s/%s", db_opts->db_path, MANIFEST_FILE_NAME); - db_opts->manifest_path = manifest_path; - db_opts->db_name = db_name; - db_opts->db_type = LEVELDB; - - db_opts->db_key_type = MDHIM_UNIFYFS_KEY; - db_opts->debug_level = MLOG_CRIT; - db_opts->max_recs_per_slice = rangesz; - db_opts->rserver_factor = serratio; - - ret = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if (ret != MPI_SUCCESS) { - printf("Error initializing MPI with threads\n"); - exit(1); - } - - if (provided != MPI_THREAD_MULTIPLE) { - printf("Not able to enable MPI_THREAD_MULTIPLE mode\n"); - exit(1); - } - - comm = MPI_COMM_WORLD; - md = mdhimInit(&comm, db_opts); - - if (!md) { - printf("Error initializing MDHIM\n"); - MPI_Abort(MPI_COMM_WORLD, ret); - exit(1); - } - - meta_t *meta_lst = (meta_t *)malloc(segnum*sizeof(meta_t)); - ulfs_key_t **key_lst = (ulfs_key_t **)malloc(segnum*sizeof(ulfs_key_t *)); - ulfs_val_t **val_lst = (ulfs_val_t **)malloc(segnum * sizeof(ulfs_val_t *)); - - int *key_lens = (int *)malloc(segnum * sizeof(int)); - int *val_lens = (int *)malloc(segnum * sizeof(int)); - - long i; - for (i = 0; i < segnum; i++) { - key_lens[i] = sizeof(ulfs_key_t); - val_lens[i] = sizeof(ulfs_val_t); - } - - int glb_size, glb_rank; - MPI_Comm_size(MPI_COMM_WORLD, &glb_size); - MPI_Comm_rank(MPI_COMM_WORLD, &glb_rank); - init_meta_lst(meta_lst, key_lst, val_lst, segnum,\ - transz, glb_rank, glb_size); - - MPI_Comm_size(md->mdhim_comm, &size); - MPI_Barrier(MPI_COMM_WORLD); - - gettimeofday(&putstart, NULL); - long total_keys = 0, round = 0; - - //Insert the keys into MDHIM - brm = mdhimBPut(md, (void **)(&key_lst[total_keys]), key_lens, - (void **) (&val_lst[total_keys]), val_lens, segnum, - NULL, NULL); - //Iterate through the return messages to see if there is an error and to free it - brmp = brm; - if (!brmp || brmp->error) { - printf("Rank - %d: Error inserting keys/values into MDHIM\n", md->mdhim_rank); - } - - gettimeofday(&putreleasestart, NULL); - while (brmp) { - if (brmp->error < 0) { - printf("Rank: %d - Error inserting key/values info MDHIM\n", md->mdhim_rank); - } - - brm = brmp; - brmp = brmp->next; - //Free the message - mdhim_full_release_msg(brm); - - } - gettimeofday(&putreleaseend, NULL); - putreleasetime = 1000000*(putreleaseend.tv_sec-putreleasestart.tv_sec)\ - + putreleaseend.tv_usec - putreleasestart.tv_usec; - - round++; - - MPI_Barrier(MPI_COMM_WORLD); - gettimeofday(&putend, NULL); - puttime = 1000000*(putend.tv_sec - putstart.tv_sec) + putend.tv_usec -putstart.tv_usec; - puttime/=1000000; - putbw = (sizeof(ulfs_key_t)+sizeof(ulfs_val_t))*segnum*size/puttime; - putops = segnum/puttime; - gettimeofday(&calstart, NULL); - ret = mdhimStatFlush(md, md->primary_index); - if (ret != MDHIM_SUCCESS) { - printf("Error getting stats from MDHIM database\n"); - } else { - } - gettimeofday(&calend, NULL); - caltime = 1000000*(calend.tv_sec-calstart.tv_sec)+calend.tv_usec-calstart.tv_usec; - caltime = caltime/1000000; - - total_keys = 0; - - ulfs_key_t **get_key_lst = (ulfs_key_t **)malloc(2 * segnum\ - * transz / gettransz * sizeof(ulfs_key_t *)); - int *get_key_lens =\ - (long *)malloc(2 * segnum * transz / gettransz * sizeof(int)); - int *get_val_lens = \ - (long *)malloc(2 * segnum * transz / gettransz * sizeof(int)); - - init_get_key_lst(get_key_lst, get_key_lens, get_val_lens,\ - segnum * transz / gettransz,\ - gettransz, glb_rank, glb_size); - - gettimeofday(&getstart, NULL); - //Get the keys and values back starting from and including key[0] - - total_keys = 0; - - bgrm = mdhimBGet(md, md->primary_index,\ - &get_key_lst[total_keys], get_key_lens, - segnum * transz / gettransz* 2, MDHIM_RANGE_BGET); - bgrmp = bgrm; - while (bgrmp) { - if (bgrmp->error < 0) { - printf("Rank: %d - Error retrieving values", md->mdhim_rank); - } - for (i = 0; i < bgrmp->num_keys; i++) { - /* printf("Rank: %d - Got key: %ld, num_keys is %ld\n", md->mdhim_rank, - ((ulfs_key_t *)bgrmp->keys[i])->offset, bgrmp->num_keys ); - fflush(stdout); - */ - } - - bgrmp = bgrmp->next; - //Free the message received - mdhim_full_release_msg(bgrm); - bgrm = bgrmp; - } - - MPI_Barrier(MPI_COMM_WORLD); - gettimeofday(&getend, NULL); - gettime = 1000000*(getend.tv_sec - getstart.tv_sec)+getend.tv_usec-getstart.tv_usec; - gettime/=1000000; - getops = segnum/gettime; - - if (md->mdhim_rank == size - 1) { - printf("puttime is %lf, putops is %lf, rank is %d\n",\ - puttime, putops, md->mdhim_rank); fflush(stdout); - printf("gettime is %lf, getops is %lf, rank is %d\n",\ - gettime, getops, md->mdhim_rank); fflush(stdout); - } - - free(meta_lst); - - free(key_lens); - for (i=0; ifid = meta_lst[i].fid; - key_lst[i]->offset = meta_lst[i].offset; - - - value_lst[i] = (ulfs_val_t *)malloc(sizeof(ulfs_val_t)); - value_lst[i]->addr = meta_lst[i].addr; - value_lst[i]->len = meta_lst[i].len; - value_lst[i]->nodeid = meta_lst[i].nodeid; - - } - return 0; -} - -void init_get_key_lst(ulfs_key_t **get_key_lst,\ - int *get_key_lens, int *get_val_lens,\ - int key_cnt, long gettransz, int rank, int glb_size) { - long i; - for (i = 0; i < key_cnt; i++) { - get_key_lst[2 * i] = (ulfs_key_t *)malloc(sizeof(ulfs_key_t)); - get_key_lst[2 * i]->fid = 0; -// get_key_lst[2 * i]->fid = rank; -// get_key_lst[2 * i]->offset = (long)i * gettransz + rank * gettransz * key_cnt; -// get_key_lst[2 * i]->offset = (long)i * gettransz; - get_key_lst[2 * i]->offset = (long)i * gettransz * glb_size + rank * gettransz; - get_key_lens[2 * i] = sizeof(ulfs_key_t); - get_val_lens[2 * i] = sizeof(ulfs_val_t); - - get_key_lst[2 * i + 1] = (ulfs_key_t *)malloc(sizeof(ulfs_key_t)); - get_key_lst[2 * i + 1]->fid = 0; - get_key_lst[2 * i + 1]->offset = get_key_lst[2 * i]->offset + gettransz - 1; - get_key_lens[2 * i + 1] = sizeof(ulfs_key_t); - get_val_lens[2 * i + 1] = sizeof(ulfs_val_t); - - - } -} - - diff --git a/meta/tests/single_tests/range_bget.sh b/meta/tests/single_tests/range_bget.sh deleted file mode 100644 index 81ceeead6..000000000 --- a/meta/tests/single_tests/range_bget.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -set -x - -ulimit -c unlimited -export MPICH_MAX_THREAD_SAFETY=multiple - -nnodes=1 -nprocs=1 -# determine number of nodes in our allocation -nodes=$SLURM_NNODES -SEGNUM=1024 -BULKNUM=$SEGNUM -srun --clear-ssd -n${nprocs} -N${nnodes} ./range_bget -c ${BULKNUM} -s 16 -t 1048576 -g 1048576 -r 1048576 -n ${SEGNUM} -p /l/ssd/ -d db 2>&1|tee range_${SLURM_NNODES}.log - diff --git a/meta/tests/single_tests/range_test.c b/meta/tests/single_tests/range_test.c deleted file mode 100644 index bea723082..000000000 --- a/meta/tests/single_tests/range_test.c +++ /dev/null @@ -1,334 +0,0 @@ -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * - * Copyright 2017, UT-Battelle, LLC. - * - * LLNL-CODE-741539 - * All rights reserved. - * - * This is the license for UnifyFS. - * For details, see https://github.com/LLNL/UnifyFS. - * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. - */ - -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * Copyright (c) 2017, Florida State University. Contributions from - * the Computer Architecture and Systems Research Laboratory (CASTL) - * at the Department of Computer Science. - * - * Written by: Teng Wang, Adam Moody, Weikuan Yu, Kento Sato, Kathryn Mohror - * LLNL-CODE-728877. All rights reserved. - * - * This file is part of burstfs. - * For details, see https://github.com/llnl/burstfs - * Please read https://github.com/llnl/burstfs/LICENSE for full license text. - */ - -/* - * - * Copyright (c) 2014, Los Alamos National Laboratory - * All rights reserved. - * - */ - -#include -#include -#include -#include -#include -#include -#include "mpi.h" -#include "mdhim.h" - -#define MANIFEST_FILE_NAME "/mdhim_manifest_" -#define GEN_STR_LEN 1024 - -struct timeval putstart, putend; -struct timeval calstart, calend; -struct timeval putreleasestart, putreleaseend; -double puttime; -double putbw; -double putops; -double caltime; -double putreleasetime = 0; - -struct timeval getstart, getend; - -double gettime; -double getbw; -double getops; - -typedef struct { - unsigned long fid; - unsigned long nodeid; - - unsigned long offset; - unsigned long addr; - unsigned long len; -}meta_t; - -typedef struct { - unsigned long fid; - unsigned long offset; -}ulfs_key_t; - -typedef struct { - unsigned long nodeid; - unsigned long len; - unsigned long addr; -}ulfs_val_t; - -int init_meta_lst(meta_t *meta_lst, ulfs_key_t **key_lst, ulfs_val_t **value_lst, \ - long segnum, long transz, int rank); - -int main(int argc, char **argv) { - int c, serratio, bulknum, ret, provided, size, path_len; - - long transz, segnum, rangesz; - - char db_path[GEN_STR_LEN] = {0}; - char db_name[GEN_STR_LEN] = {0}; - - MPI_Comm comm; - - struct mdhim_t *md; - struct mdhim_brm_t *brm, *brmp; - struct mdhim_bgetrm_t *bgrm, *bgrmp; - mdhim_options_t *db_opts; // Local variable for db create options to be passed - - static const char * opts = "c:t:s:r:n:p:d:"; - - while((c = getopt(argc, argv, opts)) != -1){ - switch (c) { - - case 'c': /*number of batched key-value pairs in each bput*/ - bulknum = atoi(optarg); break; - case 's': /*server factor same as MDHIM*/ - serratio = atoi(optarg); break; - case 't': /*transfer size*/ - transz = atol(optarg); break; - case 'r': /*the key range for each slice*/ - rangesz = atol(optarg); break; - case 'n': /*number of transfers*/ - segnum = atol(optarg); break; - case 'p': /*path of the database*/ - strcpy(db_path, optarg); break; - case 'd': /*name of the database*/ - strcpy(db_name, optarg); break; - } - } - - db_opts = malloc(sizeof(struct mdhim_options_t)); - - db_opts->db_path = db_path; - db_opts->db_name = "ulfsDB"; - db_opts->manifest_path = NULL; - db_opts->db_type = LEVELDB; - db_opts->db_create_new = 1; - db_opts->db_value_append = MDHIM_DB_OVERWRITE; - - db_opts->rserver_factor = serratio; - db_opts->db_paths = NULL; - db_opts->num_paths = 0; - db_opts->num_wthreads = 1; - - path_len = strlen(db_opts->db_path) + strlen(MANIFEST_FILE_NAME) + 1; - - char *manifest_path; - manifest_path = malloc(path_len); - sprintf(manifest_path, "%s/%s", db_opts->db_path, MANIFEST_FILE_NAME); - db_opts->manifest_path = manifest_path; - db_opts->db_name = db_name; - db_opts->db_type = LEVELDB; - - db_opts->db_key_type = MDHIM_UNIFYFS_KEY; - db_opts->debug_level = MLOG_CRIT; - db_opts->max_recs_per_slice = rangesz; - db_opts->rserver_factor = serratio; - - ret = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if (ret != MPI_SUCCESS) { - printf("Error initializing MPI with threads\n"); - exit(1); - } - - if (provided != MPI_THREAD_MULTIPLE) { - printf("Not able to enable MPI_THREAD_MULTIPLE mode\n"); - exit(1); - } - - comm = MPI_COMM_WORLD; - md = mdhimInit(&comm, db_opts); - - if (!md) { - printf("Error initializing MDHIM\n"); - MPI_Abort(MPI_COMM_WORLD, ret); - exit(1); - } - - meta_t *meta_lst = (meta_t *)malloc(segnum*sizeof(meta_t)); - ulfs_key_t **key_lst = (ulfs_key_t **)malloc(segnum*sizeof(ulfs_key_t *)); - ulfs_val_t **val_lst = (ulfs_val_t **)malloc(segnum * sizeof(ulfs_val_t *)); - - int *key_lens = (int *)malloc(segnum * sizeof(int)); - int *val_lens = (int *)malloc(segnum * sizeof(int)); - - long i; - for (i = 0; i < segnum; i++) { - key_lens[i] = sizeof(ulfs_key_t); - val_lens[i] = sizeof(ulfs_val_t); - } - init_meta_lst(meta_lst, key_lst, val_lst, segnum, transz, md->mdhim_rank); - - MPI_Comm_size(md->mdhim_comm, &size); - MPI_Barrier(MPI_COMM_WORLD); - - gettimeofday(&putstart, NULL); - long total_keys = 0, round = 0; - while (total_keys < segnum) { - //Insert the keys into MDHIM - brm = mdhimBPut(md, (void **)(&key_lst[total_keys]), key_lens, - (void **) (&val_lst[total_keys]), val_lens, bulknum, - NULL, NULL); - //Iterate through the return messages to see if there is an error and to free it - brmp = brm; - if (!brmp || brmp->error) { - printf("Rank - %d: Error inserting keys/values into MDHIM\n", md->mdhim_rank); - } - - gettimeofday(&putreleasestart, NULL); - while (brmp) { - if (brmp->error < 0) { - printf("Rank: %d - Error inserting key/values info MDHIM\n", md->mdhim_rank); - } - - brm = brmp; - brmp = brmp->next; - //Free the message - mdhim_full_release_msg(brm); - - } - gettimeofday(&putreleaseend, NULL); - putreleasetime = 1000000*(putreleaseend.tv_sec-putreleasestart.tv_sec)+putreleaseend.tv_usec-putreleasestart.tv_usec; - - total_keys += bulknum; - round++; - } - MPI_Barrier(MPI_COMM_WORLD); - gettimeofday(&putend, NULL); - puttime = 1000000*(putend.tv_sec - putstart.tv_sec) + putend.tv_usec -putstart.tv_usec; - puttime/=1000000; - putbw = (sizeof(ulfs_key_t)+sizeof(ulfs_val_t))*segnum*size/puttime; - putops = segnum/puttime; - gettimeofday(&calstart, NULL); - ret = mdhimStatFlush(md, md->primary_index); - if (ret != MDHIM_SUCCESS) { - printf("Error getting stats from MDHIM database\n"); - } else { - } - gettimeofday(&calend, NULL); - caltime = 1000000*(calend.tv_sec-calstart.tv_sec)+calend.tv_usec-calstart.tv_usec; - caltime = caltime/1000000; - total_keys = 0; - - gettimeofday(&getstart, NULL); - //Get the keys and values back starting from and including key[0] - - total_keys = 0; - while (total_keys < segnum) { - bgrm = mdhimBGetRange(md, md->primary_index, - key_lst[total_keys], key_lst[total_keys + bulknum - 1],\ - sizeof(ulfs_key_t)); - bgrmp = bgrm; - while (bgrmp) { - if (bgrmp->error < 0) { - printf("Rank: %d - Error retrieving values", md->mdhim_rank); - } - for (i = 0; i < bgrmp->num_keys; i++) { -/* printf("Rank: %d - Got key: %ld, num_keys is %ld\n", md->mdhim_rank, - ((ulfs_key_t *)bgrmp->keys[i])->offset, bgrmp->num_keys ); - fflush(stdout); -*/ - } - - bgrmp = bgrmp->next; - //Free the message received - mdhim_full_release_msg(bgrm); - bgrm = bgrmp; - } - total_keys += bulknum; - } - MPI_Barrier(MPI_COMM_WORLD); - gettimeofday(&getend, NULL); - gettime = 1000000*(getend.tv_sec - getstart.tv_sec)+getend.tv_usec-getstart.tv_usec; - gettime/=1000000; - getops = segnum/gettime; - - if (md->mdhim_rank == size - 1) { - printf("puttime is %lf, putops is %lf, rank is %d\n",\ - puttime, putops, md->mdhim_rank); fflush(stdout); - printf("gettime is %lf, getops is %lf, rank is %d\n",\ - gettime, getops, md->mdhim_rank); fflush(stdout); - } - - free(meta_lst); - - free(key_lens); - for (i=0; ifid = meta_lst[i].fid; - key_lst[i]->offset = meta_lst[i].offset; - - value_lst[i] = (ulfs_val_t *)malloc(sizeof(ulfs_val_t)); - value_lst[i]->addr = meta_lst[i].addr; - value_lst[i]->len = meta_lst[i].len; - value_lst[i]->nodeid = meta_lst[i].nodeid; - } - return 0; -} - - - diff --git a/meta/tests/single_tests/range_test.sh b/meta/tests/single_tests/range_test.sh deleted file mode 100644 index df592c30d..000000000 --- a/meta/tests/single_tests/range_test.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -ulimit -c unlimited -export MPICH_MAX_THREAD_SAFETY=multiple - -nodes=$SLURM_NNODES -BATCH_CNT=1024 - -for BULK_NUM in ${BATCH_CNT}; do - srun --clear-ssd -n${SLURM_NNODES} -N${SLURM_NNODES} ./range_test -c ${BATCH_CNT} -s 1 -t 16384 -r 1048576 -n 131072 -p /l/ssd/ -d db -done - diff --git a/meta/tests/tester/inTestAppend.txt b/meta/tests/tester/inTestAppend.txt deleted file mode 100644 index 74aff27db..000000000 --- a/meta/tests/tester/inTestAppend.txt +++ /dev/null @@ -1,11 +0,0 @@ -# Put first copy of record -put 08642 abcdefg - -# Get original copy -get 08642 - -# Append by using the -a flag in the tester program -put 08642 ABCEDFG - -# Returned value should be concatenated -get 08642 diff --git a/meta/tests/tester/inTestBasic.txt b/meta/tests/tester/inTestBasic.txt deleted file mode 100644 index 3eb794c60..000000000 --- a/meta/tests/tester/inTestBasic.txt +++ /dev/null @@ -1,21 +0,0 @@ -# Key is 6_digits * rank, value is 4*30=120 chars -# for string/byte 6_digit && 0rank -put 123456 abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456 -get 123456 - -flush - -# Key is 6_digits * rank + (1 to n), value is 40 chars -# for string/byte 6_digit && 0rank && 0i -bput 999 987654 1234ABCDEFGHIJKLMNOPQRSTUVWXYZ_+=$%*!@~ZYXWVUTSRQPONMLKJIHGFEDCBA3456 -bget 999 987654 -del 123456 - -flush - -# key is 5_digits * rank + (1 to n), value is 241 -# for string/byte 5_digit && 0rank && 0i -bput 999 56789 YXWVUTSRQPONMLKJIHGFEDCBA3456ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012abcdefghijklmnopqrstuvwxyz1234_abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456i - -bdel 999 56789 - diff --git a/meta/tests/tester/inTestBulk.txt b/meta/tests/tester/inTestBulk.txt deleted file mode 100644 index 04be33115..000000000 --- a/meta/tests/tester/inTestBulk.txt +++ /dev/null @@ -1,11 +0,0 @@ -# Put a good size numberor key/values -bput 99999 56789 abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ - -# Get all the above key/values -bget 99999 56789 - -# Required to do next ops -flush - -# Get a small set using the bulk get next operation -bgetop 500 56789 1 diff --git a/meta/tests/tester/inTestBulkLarge.txt b/meta/tests/tester/inTestBulkLarge.txt deleted file mode 100644 index 2869329f0..000000000 --- a/meta/tests/tester/inTestBulkLarge.txt +++ /dev/null @@ -1,4 +0,0 @@ -#23456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 12345 - -# data value of 2040 -bput 99999 dataValueOf2040 abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456YXWVUTSRQPONMLKJIHGFEDCBA3456ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012abcdefghijklmnopqrstuvwxyz1234_abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456AAAAABBBBBCCCCCDDDDDEEEEEFFFFFGGGGGHHHHHIIIIIJJJJJKKKKKLLLLLMMMMMNNNNNOOOOOPPPPPQQQQQRRRRRSSSSSTTTTTUUUUUVVVVVWWWWWXXXXXYYYYYZZZZZ111112222233333abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456YXWVUTSRQPONMLKJIHGFEDCBA3456ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012abcdefghijklmnopqrstuvwxyz1234_abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456AAAAABBBBBCCCCCDDDDDEEEEEFFFFFGGGGGHHHHHIIIIIJJJJJKKKKKLLLLLMMMMMNNNNNOOOOOPPPPPQQQQQRRRRRSSSSSTTTTTUUUUUVVVVVWWWWWXXXXXYYYYYZZZZZ111112222233333abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456YXWVUTSRQPONMLKJIHGFEDCBA3456ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012abcdefghijklmnopqrstuvwxyz1234_abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456AAAAABBBBBCCCCCDDDDDEEEEEFFFFFGGGGGHHHHHIIIIIJJJJJKKKKKLLLLLMMMMMNNNNNOOOOOPPPPPQQQQQRRRRRSSSSSTTTTTUUUUUVVVVVWWWWWXXXXXYYYYYZZZZZ111112222233333abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456YXWVUTSRQPONMLKJIHGFEDCBA3456ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012abcdefghijklmnopqrstuvwxyz1234_abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456AAAAABBBBBCCCCCDDDDDEEEEEFFFFFGGGGGHHHHHIIIIIJJJJJKKKKKLLLLLMMMMMNNNNNOOOOOPPPPPQQQQQRRRRRSSSSSTTTTTUUUUUVVVVVWWWWWXXXXXYYYYYZZZZZ11111222223333344444555556666677777 diff --git a/meta/tests/tester/inTestBulkLargeErr.txt b/meta/tests/tester/inTestBulkLargeErr.txt deleted file mode 100644 index 0ab8fe8e1..000000000 --- a/meta/tests/tester/inTestBulkLargeErr.txt +++ /dev/null @@ -1,9 +0,0 @@ -#23456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 12345 - -# data value of 2040 -bput 999999 dataValueOf2040dataValueOf2040dataValueOf2040dataValueOf2040dataValueOf2040dataValueOf2040dataValueOf2040dataValueOf2040dataValueOf2040dataValueOf2040dataValueOf2040dataValueOf2040dataValueOf2040dataValueOf2040dataValueOf2040dataValueOf2040dataValueOf2040dataValueOf2040dataValueOf2040dataValueOf2040dataValueOf2040 abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456YXWVUTSRQPONMLKJIHGFEDCBA3456ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012abcdefghijklmnopqrstuvwxyz1234_abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456AAAAABBBBBCCCCCDDDDDEEEEEFFFFFGGGGGHHHHHIIIIIJJJJJKKKKKLLLLLMMMMMNNNNNOOOOOPPPPPQQQQQRRRRRSSSSSTTTTTUUUUUVVVVVWWWWWXXXXXYYYYYZZZZZ111112222233333abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456YXWVUTSRQPONMLKJIHGFEDCBA3456ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012abcdefghijklmnopqrstuvwxyz1234_abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456AAAAABBBBBCCCCCDDDDDEEEEEFFFFFGGGGGHHHHHIIIIIJJJJJKKKKKLLLLLMMMMMNNNNNOOOOOPPPPPQQQQQRRRRRSSSSSTTTTTUUUUUVVVVVWWWWWXXXXXYYYYYZZZZZ111112222233333abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456YXWVUTSRQPONMLKJIHGFEDCBA3456ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012abcdefghijklmnopqrstuvwxyz1234_abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456AAAAABBBBBCCCCCDDDDDEEEEEFFFFFGGGGGHHHHHIIIIIJJJJJKKKKKLLLLLMMMMMNNNNNOOOOOPPPPPQQQQQRRRRRSSSSSTTTTTUUUUUVVVVVWWWWWXXXXXYYYYYZZZZZ111112222233333abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456YXWVUTSRQPONMLKJIHGFEDCBA3456ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012abcdefghijklmnopqrstuvwxyz1234_abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456AAAAABBBBBCCCCCDDDDDEEEEEFFFFFGGGGGHHHHHIIIIIJJJJJKKKKKLLLLLMMMMMNNNNNOOOOOPPPPPQQQQQRRRRRSSSSSTTTTTUUUUUVVVVVWWWWWXXXXXYYYYYZZZZZ11111222223333344444555556666677777 - -#Error: bulk put message too large. Bput is over Maximum size allowed of 2147483647. -#Error: Packing message failed before sending. -# -# at src/messages.h MAX_BULK_OPS set to 1000000 diff --git a/meta/tests/tester/inTestErr1.txt b/meta/tests/tester/inTestErr1.txt deleted file mode 100644 index 2ef49ad83..000000000 --- a/meta/tests/tester/inTestErr1.txt +++ /dev/null @@ -1,24 +0,0 @@ -# Basic test deck should be run first and DB should not be removed. - -# Simple bget should work -bget 10 987654 - -# Get using an op othern than EQ should fail if no flush has been done -get 123456 1 - -# This should enable the command below to work -flush -get 123456 1 - -# Should fail -get 13579 0 - -# Should work as we are not using op=EQ (gets the next value in the DB after -# the seed) -get 13579 1 - -# Should fail -bdel 10 24680 - -# Should fail as incorrec operator -get 123456 9 diff --git a/meta/tests/tester/inTestLarge.txt b/meta/tests/tester/inTestLarge.txt deleted file mode 100644 index 2bdd1d0cb..000000000 --- a/meta/tests/tester/inTestLarge.txt +++ /dev/null @@ -1,21 +0,0 @@ -#23456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 12345 -# Key for string/byte 505_digit && 0rank -put abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456YXWVUTSRQPONMLKJIHGFEDCBA3456ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012abcdefghijklmnopqrstuvwxyz12340abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456AAAAABBBBBCCCCCDDDDDEEEEEFFFFFGGGGGHHHHHIIIIIJJJJJKKKKKLLLLLMMMMMNNNNNOOOOOPPPPPQQQQQRRRRRSSSSSTTTTTUUUUUVVVVVWWWWWXXXXXYYYYYZZZZZ111112222233333 This_is_the_value_for_a_long_key - -get abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456YXWVUTSRQPONMLKJIHGFEDCBA3456ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012abcdefghijklmnopqrstuvwxyz12340abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456AAAAABBBBBCCCCCDDDDDEEEEEFFFFFGGGGGHHHHHIIIIIJJJJJKKKKKLLLLLMMMMMNNNNNOOOOOPPPPPQQQQQRRRRRSSSSSTTTTTUUUUUVVVVVWWWWWXXXXXYYYYYZZZZZ111112222233333 - -# data element of size 1015 -put dataElementOfSize1015 abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456YXWVUTSRQPONMLKJIHGFEDCBA3456ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012abcdefghijklmnopqrstuvwxyz1234_abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456AAAAABBBBBCCCCCDDDDDEEEEEFFFFFGGGGGHHHHHIIIIIJJJJJKKKKKLLLLLMMMMMNNNNNOOOOOPPPPPQQQQQRRRRRSSSSSTTTTTUUUUUVVVVVWWWWWXXXXXYYYYYZZZZZ111112222233333abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456YXWVUTSRQPONMLKJIHGFEDCBA3456ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012abcdefghijklmnopqrstuvwxyz1234_abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456AAAAABBBBBCCCCCDDDDDEEEEEFFFFFGGGGGHHHHHIIIIIJJJJJKKKKKLLLLLMMMMMNNNNNOOOOOPPPPPQQQQQRRRRRSSSSSTTTTTUUUUUVVVVVWWWWWXXXXXYYYYYZZZZZ11111222223333344444 - -get dataElementOfSize1015 - -# data value of 2040 -put dataValueOf2040 abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456YXWVUTSRQPONMLKJIHGFEDCBA3456ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012abcdefghijklmnopqrstuvwxyz1234_abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456AAAAABBBBBCCCCCDDDDDEEEEEFFFFFGGGGGHHHHHIIIIIJJJJJKKKKKLLLLLMMMMMNNNNNOOOOOPPPPPQQQQQRRRRRSSSSSTTTTTUUUUUVVVVVWWWWWXXXXXYYYYYZZZZZ111112222233333abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456YXWVUTSRQPONMLKJIHGFEDCBA3456ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012abcdefghijklmnopqrstuvwxyz1234_abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456AAAAABBBBBCCCCCDDDDDEEEEEFFFFFGGGGGHHHHHIIIIIJJJJJKKKKKLLLLLMMMMMNNNNNOOOOOPPPPPQQQQQRRRRRSSSSSTTTTTUUUUUVVVVVWWWWWXXXXXYYYYYZZZZZ111112222233333abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456YXWVUTSRQPONMLKJIHGFEDCBA3456ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012abcdefghijklmnopqrstuvwxyz1234_abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456AAAAABBBBBCCCCCDDDDDEEEEEFFFFFGGGGGHHHHHIIIIIJJJJJKKKKKLLLLLMMMMMNNNNNOOOOOPPPPPQQQQQRRRRRSSSSSTTTTTUUUUUVVVVVWWWWWXXXXXYYYYYZZZZZ111112222233333abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456YXWVUTSRQPONMLKJIHGFEDCBA3456ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012abcdefghijklmnopqrstuvwxyz1234_abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456AAAAABBBBBCCCCCDDDDDEEEEEFFFFFGGGGGHHHHHIIIIIJJJJJKKKKKLLLLLMMMMMNNNNNOOOOOPPPPPQQQQQRRRRRSSSSSTTTTTUUUUUVVVVVWWWWWXXXXXYYYYYZZZZZ11111222223333344444555556666677777 - -get dataValueOf2040 - -# data element of size 505 -put dataElementOfSize505 abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456YXWVUTSRQPONMLKJIHGFEDCBA3456ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012abcdefghijklmnopqrstuvwxyz12340abcdefghijklmnopqrstuvwxyz1234ABCDEFGHIJKLMNOPQRSTUVWXYZ5678zyxwvutsrqponmlkjihgfedcba9012ZYXWVUTSRQPONMLKJIHGFEDCBA3456AAAAABBBBBCCCCCDDDDDEEEEEFFFFFGGGGGHHHHHIIIIIJJJJJKKKKKLLLLLMMMMMNNNNNOOOOOPPPPPQQQQQRRRRRSSSSSTTTTTUUUUUVVVVVWWWWWXXXXXYYYYYZZZZZ111112222233333 - -get dataElementOfSize505 - diff --git a/meta/tests/tester/inTestNext.txt b/meta/tests/tester/inTestNext.txt deleted file mode 100644 index dac7cb296..000000000 --- a/meta/tests/tester/inTestNext.txt +++ /dev/null @@ -1,9 +0,0 @@ -# Insert small set, later used to text get OPS -bput 6 nptest.txt -flush -# Test differ get ops: EQ (0), NEXT(1), PREV(2), FIRST(3) and LAST (4) -get gt1.txt -get gt2.txt -get gt3.txt -get gt4.txt -get gt5.txt diff --git a/meta/tests/tester/inTestOps.txt b/meta/tests/tester/inTestOps.txt deleted file mode 100644 index e560d2fb6..000000000 --- a/meta/tests/tester/inTestOps.txt +++ /dev/null @@ -1,122 +0,0 @@ -# Large number of operations testing different operations. Maximum of nodes should be four as rank is multiplied -# by key_seed and begins to generate too large values for keys. - -# Put several sets of sequences of 99999 values -bput 99999 199135790 AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz -bput 99999 25468001 abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz -bput 99999 567891 ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ - -# Verify we got the correct values we put in. For FLOAT type keys, because of magnitude of key_seed do not generate unique keys -# therefore verify is turned off on inTestOps3.txt -bget 99999 199135790 AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz -bget 99999 25468001 abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz -bget 99999 567891 ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ - -flush - -# Test operations of bulkgetop (1) NEXT and (2) PREV -# This test is turned off for BYTE keys as the NEXT/PREV operations do not generate consistent NEXT/PREV values. -# FLOAT values also have a problem generating unique keys therefore a smaller number for this request is tested. -bgetop 9999 25468001 1 -bgetop 9999 25478001 2 - -# Test getting the FIRST value with different keys, should get the same key/value -get 567891 3 -get 199135790 3 -get 25468001 3 - -# Put more sequences -bput 99999 299135790 AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz -bput 99999 25246802 abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz -bput 99999 256782 ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ - -flush -# Get LAST value, should get the same key/value -get 25468001 4 -get 199135790 4 -get 567891 4 - -# Put more sequences -bput 99999 399135790 AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz -bput 99999 35246803 abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz -bput 99999 356783 ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ - -bput 99999 499135790 AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz -bput 99999 45246804 abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz -bput 99999 456784 ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ - -# Delete one sequence -bdel 99999 299135790 -bdel 99999 25246802 -bdel 99999 256782 - -# Put more -bput 99999 599135790 AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz -bput 99999 55246805 abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz -bput 99999 556785 ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ - -bput 99999 991357900 AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz -bput 99999 55468000 abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz -bput 99999 167890 ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ - -flush - -# Using the tester's ngetn functions issue 999999 consecutive get NEXT operations starting with key 0 -# This is also turned off for BYTE keys as NEXT operation does not generate consistent values. -ngetn 999999 0 0 - -# Get FLAST (4) and FIRST (3) key/value -get 25468001 4 -get 199135790 4 -get 567891 4 -get 567891 3 -get 199135790 3 -get 25468001 3 - -# Delete an intermediate sequence -bdel 99999 499135790 -bdel 99999 45246804 -bdel 99999 456784 - -# Get and verify values from first set of sequences. DO NOT verify last sequence as it overlaps and does not verify -# This is not true for strings as they generate unique keys. -bget 99999 199135790 AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz -bget 99999 25468001 abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz -bget 99999 567891 # ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ - -# Delete several sets -bdel 99999 399135790 -bdel 99999 35246803 -bdel 99999 356783 - -bdel 99999 599135790 -bdel 99999 55246805 -bdel 99999 556785 - -bdel 99999 991357900 -bdel 99999 55468000 -bdel 99999 167890 - -flush -# Get FLAST (4) and FIRST (3) key/value -get 25468001 4 -get 199135790 4 -get 567891 4 -get 567891 3 -get 199135790 3 -get 25468001 3 - -# Delete that last set of sequences so there should be nothing left in DB -bdel 99999 199135790 -bdel 99999 25468001 -bdel 99999 567891 - -flush - -# These should generate ERROR as there is nothing left in DB -get 11468001 4 -get 119135790 4 -get 117891 4 -get 117891 3 -get 119135790 3 -get 11468001 3 diff --git a/meta/tests/tester/inTestOps3.txt b/meta/tests/tester/inTestOps3.txt deleted file mode 100644 index 4fd52caa2..000000000 --- a/meta/tests/tester/inTestOps3.txt +++ /dev/null @@ -1,93 +0,0 @@ -bput 9999 199135790 AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz -bput 9999 25468001 abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz -bput 9999 567891 ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ - -bget 9999 199135790 # AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz -bget 9999 25468001 # abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz -bget 9999 567891 # ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ - -flush - -bgetop 999 25468001 1 -bgetop 999 25469001 2 - -get 567891 3 -get 199135790 3 -get 25468001 3 - -bput 9999 299135790 AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz -bput 9999 25246802 abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz -bput 9999 256782 ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ - -flush -get 25468001 4 -get 199135790 4 -get 567891 4 - -bput 9999 399135790 AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz -bput 9999 35246803 abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz -bput 9999 356783 ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ - -bput 9999 499135790 AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz -bput 9999 45246804 abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz -bput 9999 456784 ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ - -bdel 9999 299135790 -bdel 9999 25246802 -bdel 9999 256782 - -bput 9999 599135790 AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz -bput 9999 55246805 abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz -bput 9999 556785 ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ - -bput 9999 991357900 AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz -bput 9999 55468000 abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz -bput 9999 167890 ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ - -flush -get 25468001 4 -get 199135790 4 -get 567891 4 -get 567891 3 -get 199135790 3 -get 25468001 3 - -bdel 9999 499135790 -bdel 9999 45246804 -bdel 9999 456784 - -bget 9999 199135790 # AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz -bget 9999 25468001 # abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz -bget 9999 567891 # ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ - -bdel 9999 399135790 -bdel 9999 35246803 -bdel 9999 356783 - -bdel 9999 599135790 -bdel 9999 55246805 -bdel 9999 556785 - -bdel 9999 991357900 -bdel 9999 55468000 -bdel 9999 167890 - -flush -get 25468001 4 -get 199135790 4 -get 567891 4 -get 567891 3 -get 199135790 3 -get 25468001 3 - -bdel 9999 199135790 -bdel 9999 25468001 -bdel 9999 567891 - -flush -get 11468001 4 -get 119135790 4 -get 117891 4 -get 117891 3 -get 119135790 3 -get 11468001 3 diff --git a/meta/tests/tester/inTestOps5.txt b/meta/tests/tester/inTestOps5.txt deleted file mode 100644 index 976a63175..000000000 --- a/meta/tests/tester/inTestOps5.txt +++ /dev/null @@ -1,96 +0,0 @@ -bput 99999 199135790 AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz -bput 99999 25468001 abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz -bput 99999 567891 ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ - -bget 99999 199135790 AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz -bget 99999 25468001 abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz -bget 99999 567891 ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ - -flush - -bgetop 99999 25468001 1 -bgetop 99999 25468001 2 - -get 567891 3 -get 199135790 3 -get 25468001 3 - -bput 99999 299135790 AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz -bput 99999 25246802 abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz -bput 99999 256782 ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ - -flush -get 25468001 4 -get 199135790 4 -get 567891 4 - -bput 99999 399135790 AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz -bput 99999 35246803 abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz -bput 99999 356783 ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ - -bput 99999 499135790 AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz -bput 99999 45246804 abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz -bput 99999 456784 ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ - -bdel 99999 299135790 -bdel 99999 25246802 -bdel 99999 256782 - -bput 99999 599135790 AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz -bput 99999 55246805 abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz -bput 99999 556785 ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ - -bput 99999 991357900 AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz -bput 99999 55468000 abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz -bput 99999 167890 ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ - -flush - -ngetn 999999 0 0 - -get 25468001 4 -get 199135790 4 -get 567891 4 -get 567891 3 -get 199135790 3 -get 25468001 3 - -bdel 99999 499135790 -bdel 99999 45246804 -bdel 99999 456784 - -bget 99999 199135790 AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz -bget 99999 25468001 abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz -bget 99999 567891 ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ - -bdel 99999 399135790 -bdel 99999 35246803 -bdel 99999 356783 - -bdel 99999 599135790 -bdel 99999 55246805 -bdel 99999 556785 - -bdel 99999 991357900 -bdel 99999 55468000 -bdel 99999 167890 - -flush -get 25468001 4 -get 199135790 4 -get 567891 4 -get 567891 3 -get 199135790 3 -get 25468001 3 - -bdel 99999 199135790 -bdel 99999 25468001 -bdel 99999 567891 - -flush -get 11468001 4 -get 119135790 4 -get 117891 4 -get 117891 3 -get 119135790 3 -get 11468001 3 diff --git a/meta/tests/tester/inTestOps6.txt b/meta/tests/tester/inTestOps6.txt deleted file mode 100644 index b3bfec594..000000000 --- a/meta/tests/tester/inTestOps6.txt +++ /dev/null @@ -1,96 +0,0 @@ -bput 99999 199135790 AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz -bput 99999 25468001 abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz -bput 99999 567891 ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ - -bget 99999 199135790 AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz -bget 99999 25468001 abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz -bget 99999 567891 ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ - -flush - -#bgetop 99999 25468001 1 -#bgetop 99999 25468001 2 - -get 567891 3 -get 199135790 3 -get 25468001 3 - -bput 99999 299135790 AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz -bput 99999 25246802 abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz -bput 99999 256782 ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ - -flush -get 25468001 4 -get 199135790 4 -get 567891 4 - -bput 99999 399135790 AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz -bput 99999 35246803 abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz -bput 99999 356783 ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ - -bput 99999 499135790 AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz -bput 99999 45246804 abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz -bput 99999 456784 ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ - -bdel 99999 299135790 -bdel 99999 25246802 -bdel 99999 256782 - -bput 99999 599135790 AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz -bput 99999 55246805 abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz -bput 99999 556785 ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ - -bput 99999 991357900 AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz -bput 99999 55468000 abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz -bput 99999 167890 ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ - -flush - -#ngetn 999999 0 0 - -get 25468001 4 -get 199135790 4 -get 567891 4 -get 567891 3 -get 199135790 3 -get 25468001 3 - -bdel 99999 499135790 -bdel 99999 45246804 -bdel 99999 456784 - -bget 99999 199135790 AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz_AbCdEfGhIjKlMnOpQrStUvWxYz -bget 99999 25468001 abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz_abcdefghijklmnopqrstuvwxyz -bget 99999 567891 ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ_ABCDEFGHIJKLMNOPQRSTUVWXYZ - -bdel 99999 399135790 -bdel 99999 35246803 -bdel 99999 356783 - -bdel 99999 599135790 -bdel 99999 55246805 -bdel 99999 556785 - -bdel 99999 991357900 -bdel 99999 55468000 -bdel 99999 167890 - -flush -get 25468001 4 -get 199135790 4 -get 567891 4 -get 567891 3 -get 199135790 3 -get 25468001 3 - -bdel 99999 199135790 -bdel 99999 25468001 -bdel 99999 567891 - -flush -get 11468001 4 -get 119135790 4 -get 117891 4 -get 117891 3 -get 119135790 3 -get 11468001 3 diff --git a/meta/tests/tester/inTestRand.txt b/meta/tests/tester/inTestRand.txt deleted file mode 100644 index 5364ad47e..000000000 --- a/meta/tests/tester/inTestRand.txt +++ /dev/null @@ -1,11 +0,0 @@ -# Put n=40000 set of random records, key_size (for string/byte) of 128 chars -# value_size of 2040 chars. Last param makes sizes to be exact. If a 0 -# is used instead then sizes are 1 char up to x_size. -nput 40000 500 2040 1 - -flush - -# get a sequence of n record starting starting with a random value or zero if -# the second parameter is set to zero. The last parameter works in a similar -# manner as stated above. -ngetn 5000 500 1 diff --git a/meta/tests/tester/mdhimtst b/meta/tests/tester/mdhimtst deleted file mode 100644 index c8f8cd9c7..000000000 Binary files a/meta/tests/tester/mdhimtst and /dev/null differ diff --git a/meta/tests/tester/mdhimtst.c b/meta/tests/tester/mdhimtst.c deleted file mode 100644 index 0c6ccabe7..000000000 --- a/meta/tests/tester/mdhimtst.c +++ /dev/null @@ -1,2257 +0,0 @@ -/* - mdhimiftst.c - file based test frame - - * based on the pbliftst.c - Copyright (C) 2002 - 2007 Peter Graf - - pbliftst.c file is part of PBL - The Program Base Library. - PBL is free software. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - - For more information on the Program Base Library or Peter Graf, - please see: http://www.mission-base.com/. - - - ------------------------------------------------------------------------------ -*/ - -/* - * make sure "strings | grep Id | sort -u" shows the source file versions - */ -char * mdhimTst_c_id = "$Id: mdhimTst.c,v 1.00 2013/07/08 20:56:50 JHR Exp $"; - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "mpi.h" -#include "mdhim.h" -#include "mdhim_options.h" - -// From partitioner.h: -/* - #define MDHIM_INT_KEY 1 - //64 bit signed integer - #define MDHIM_LONG_INT_KEY 2 - #define MDHIM_FLOAT_KEY 3 - #define MDHIM_DOUBLE_KEY 4 - #define MDHIM_LONG_DOUBLE_KEY 5 - #define MDHIM_STRING_KEY 6 - //An arbitrary sized key - #define MDHIM_BYTE_KEY 7 */ - -#define TEST_BUFLEN 4096 -int BYTE_BUFLEN = 4; -int VAL_BUFLEN = 4; - -static FILE * logfile; -static FILE * infile; -int verbose = 1; // By default generate lots of feedback status lines -int dbOptionAppend = MDHIM_DB_OVERWRITE; -int to_log = 0; -// MDHIM_INT_KEY=1, MDHIM_LONG_INT_KEY=2, MDHIM_FLOAT_KEY=3, MDHIM_DOUBLE_KEY=4 -// MDHIM_LONG_DOUBLE_KEY=5, MDHIM_STRING_KEY=6, MDHIM_BYTE_KEY=7 -int key_type = 1; // Default "int" - -#define MAX_ERR_REPORT 50 -static char *errMsgs[MAX_ERR_REPORT]; -static int errMsgIdx = 0; - -static int sc_len; // Source Character string length for Random String generation -static char *sourceChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ124567890"; - -// Labels for basic get operations -static char *getOpLabel[] = { "MDHIM_GET_EQ", "MDHIM_GET_NEXT", "MDHIM_GET_PREV", - "MDHIM_GET_FIRST", "MDHIM_GET_LAST"}; - -#ifdef _WIN32 -#include -#endif - -#include - -// Add to error message list -static void addErrorMessage(char *msg) { -// Max number of error messages reached ignore the rest, but still increment count - if (errMsgIdx > MAX_ERR_REPORT) - errMsgIdx++; - else - errMsgs[errMsgIdx++] = msg; -} - -static void tst_say(int err, char * format, ...) { - va_list ap; - - if (err) { - char *errMsg = (char *)malloc(sizeof(char)* TEST_BUFLEN); - va_start( ap, format ); - vsnprintf(errMsg, TEST_BUFLEN, format, ap); - addErrorMessage(errMsg); - - // Make sure error messages print to stderr when loging output - if (to_log) fprintf(stderr, "%s", errMsg); - va_end(ap); - } - - /* - * use fprintf to give out the text - */ - if (to_log) - { - va_start( ap, format ); - vfprintf( logfile, format, ap); - va_end(ap); - } - else - { - va_start( ap, format ); - vfprintf( stdout, format, ap); - va_end(ap); - } - -} - -int check_rank_range(int rank, int init, int final) { - - if (rank >= init && rank <= final) { //printf("\nRank %d is in range between %d and %d\n", rank, init, final); - return 0;} - else {//printf("\nRank %d is NOT in range between %d and %d\n", rank, init, final); - return 1; - } -} - -int check_rank_mod(int rank, int rmod){ - - if ((rank % rmod) ==0){ //printf("\nRank %d is divisible by %d\n", rank, rmod); - return 0;} - else{ //printf("\nRank %d is NOT divisible by %d\n", rank, rmod); - return 1; } -} - -static void putChar( int c ) -{ - static int last = 0; - - if( last == '\n' && c == '\n' ) - { - return; - } - - last = c; - putc( last, logfile ); -} - -static int getChar( void ) -{ - int c; - c = getc( infile ); - - /* - * a '#' starts a comment for the rest of the line - */ - if( c == '#') - { - /* - * comments starting with ## are duplicated to the output - */ - c = getc( infile ); - if( c == '#' ) - { - putChar( '#' ); - putChar( '#' ); - - while( c != '\n' && c != EOF ) - { - c = getc( infile ); - if( c != EOF ) - { - putChar( c ); - } - } - } - else - { - while( c != '\n' && c != EOF ) - { - c = getc( infile ); - } - } - } - - /* - if( c != EOF ) - { - putChar( c ); - } - */ - - return( c ); -} - -static int getWordFromString (char *aLine, char *buffer, int charIdx ) -{ - int c; - int i; - - // Check to see if past the end - if (charIdx >= strlen(aLine)) - { - *buffer = '\0'; - return charIdx; - } - - /* - * skip preceeding blanks - */ - c = aLine[charIdx++]; - while( c == '\t' || c == ' ' || c == '\n' || c == '\r' ) - { - c = aLine[charIdx++]; - } - - /* - * read one word - */ - for( i = 0; i < TEST_BUFLEN - 1; i++, c = aLine[charIdx++] ) - { - - if( c == '\r' ) - { - continue; - } - - if( c == '\t' || c == ' ' || c == '\n' || c == '\r' ) - { - *buffer = '\0'; - return charIdx; - } - - *buffer++ = c; - } - - *buffer = '\0'; - return charIdx; -} - -/* Read one line at a time. Skip any leadings blanks, and send an end of file - as if a "q" command had been encountered. Return a string with the line read. */ -static void getLine( char * buffer ) -{ - int c; - int i; - - // skip preceeding blanks - c = ' '; - while( c == '\t' || c == ' ' || c == '\n' || c == '\r' ) - { - c = getChar(); - } - - // End of input file (even if we did not find a q! - if( c == EOF ) - { - *buffer++ = 'q'; - *buffer++ = '\0'; - return; - } - - // Read one line - for( i = 0; i < TEST_BUFLEN - 1; i++, c = getChar() ) - { - - if( c == EOF || c == '\n' || c == '\r' ) - { - *buffer = '\0'; - return; - } - - *buffer++ = c; - } - - *buffer = '\0'; -} - -/* Expands escapes from a sequence of characters to null terminated string - * - * src must be a sequence of characters. - * len is the size of the sequence of characters to convert. - * - * returns a string of size 2 * len + 1 will always be sufficient - * - */ - -char *expand_escapes(const char* src, int len) -{ - char c; - int i; - char* dest; - char* res; - - if ((res = malloc(2 * len + 1)) == NULL) - { - printf("Error allocating memory in expand_escapes.\n"); - return NULL; - } - dest = res; - - for (i=0 ; i (sizeof(getOpLabel) / sizeof(*getOpLabel))) - return "Invalid_get_OPERATOR"; - else - return getOpLabel[idx]; -} - -void usage(void) -{ - printf("Usage:\n"); - printf(" -f (file with batch commands)\n"); - printf(" -l (Key length for file)\n"); - printf(" -d (Type of DB to use: levelDB=1 mysql=3)\n"); - printf(" -t (Type of keys: int=1, longInt=2, float=3, " - "double=4, longDouble=5, string=6, byte=7)\n"); - printf(" -p (path where DB will be created)\n"); - printf(" -n (Name of DataBase file or directory)\n"); - printf(" -b (MLOG_CRIT=1, MLOG_DBG=2)\n"); - printf(" -a (DB store append mode. By default records with same key are " - "overwritten. This flag turns on the option to append to existing values.\n"); - printf(" -w This flag turns on the option to either allow or deny threads to do command based on if it is dividiable by the modlus of the modulus number\n"); - printf(" -r ~ This flag turns on the option to either allow or deny threads to do command based on if the rank falls inclusively inbetween the rank ranges. NOTE: You must use the '~' inbetween the numbers. Example: -r0~2\n"); - printf(" -q<0|1> (Quiet mode, default is verbose) 1=write out to log file\n"); - exit (8); -} - -// Release the memory used in a Bulk request. If values/value_lens not present just use NULL -void freeKeyValueMem(int nkeys, void **keys, int *key_lens, char **values, int *value_lens) -{ - int i; - for (i = 0; i < nkeys; i++) - { - if (keys[i]) free(keys[i]); - if (values && value_lens && value_lens[i] && values[i]) free(values[i]); - } - if (key_lens) free(key_lens); - if (keys) free(keys); - if (value_lens) free(value_lens); - if (values) free(values); -} - -//======================================FLUSH============================ -static void execFlush(char *command, struct mdhim_t *md, int charIdx) -{ - //Get the stats - int ret = mdhimStatFlush(md, md->primary_index); - - if (ret != MDHIM_SUCCESS) { - tst_say(1, "ERROR: rank %d executing flush.\n", md->mdhim_rank); - } else { - tst_say(0, "Flush executed successfully.\n"); - } - -} - -//======================================PUT============================ -static void execPut(char *command, struct mdhim_t *md, int charIdx) -{ - // int i_key; - // long l_key; - // float f_key; - // double d_key; - struct mdhim_brm_t *brm; - unsigned char str_key [ TEST_BUFLEN ]; - //unsigned char buffer2 [ TEST_BUFLEN ]; - unsigned char value [ TEST_BUFLEN ] ; - char fhandle [TEST_BUFLEN]; //Filename handle - //int data_index =0; //Index for reading data. - char data_read[TEST_BUFLEN]; - memset(fhandle, 0, sizeof(fhandle)); - memset(data_read, 0, sizeof(data_read)); - //int ret; - - if (verbose) tst_say(0, "# put key data\n" ); - charIdx = getWordFromString( command, fhandle, charIdx); - int x = open( fhandle, O_RDONLY ); - if( x <0 ) - { - fprintf( stderr, "Failed to open data file %s, %s\n", - fhandle, strerror( errno )); - exit( -1 ); - } - int g=read(x, str_key, BYTE_BUFLEN); - if( g<0 ) - { - fprintf( stderr, "Failed to read data file %s, %s\n", - fhandle, strerror( errno )); - exit( -1 ); - } - g=read(x, value, VAL_BUFLEN); - if( g<0 ) - { - fprintf( stderr, "Failed to read data file %s, %s\n", - fhandle, strerror( errno )); - exit( -1 ); - } - - //fgets(data_read, TEST_BUFLEN, datafile); - //data_index = getWordFromString( data_read, str_key, data_index); - // Get value to store - - //charIdx = getWordFromString( data_read, buffer2, data_index); - //sprintf(value, "%s", buffer2); - // Based on key type generate a key using rank - // switch (key_type) - // { - // case MDHIM_INT_KEY: - // i_key = atoi(str_key) * (md->mdhim_rank + 1); - // sprintf(key_string, "%d", i_key); - // if (verbose) tst_say(0, "# mdhimPut( %s, %s) [int]\n", key_string, value ); - // brm = mdhimPut(md, &i_key, sizeof(i_key), value, strlen(value)+1, NULL, NULL); - // break; - // - // case MDHIM_LONG_INT_KEY: - // l_key = atol(str_key) * (md->mdhim_rank + 1); - // sprintf(key_string, "%ld", l_key); - // if (verbose) tst_say(0, "# mdhimPut( %s, %s) [long]\n", key_string, value ); - // brm = mdhimPut(md, &l_key, sizeof(l_key), value, strlen(value)+1, NULL, NULL); - // break;e - // - // case MDHIM_FLOAT_KEY: - // f_key = atof( str_key ) * (md->mdhim_rank + 1); - // sprintf(key_string, "%f", f_key); - // if (verbose) tst_say(0, "# mdhimPut( %s, %s ) [float]\n", key_string, value ); - // brm = mdhimPut(md, &f_key, sizeof(f_key), value, strlen(value)+1, NULL, NULL); - // break; - // - // case MDHIM_DOUBLE_KEY: - // d_key = atof( str_key ) * (md->mdhim_rank + 1); - // sprintf(key_string, "%e", d_key); - // if (verbose) tst_say(0, "# mdhimPut( %s, %s ) [double]\n", key_string, value ); - // brm = mdhimPut(md, &d_key, sizeof(d_key), value, strlen(value)+1, NULL, NULL); - // break; - // - // case MDHIM_STRING_KEY: - // case MDHIM_BYTE_KEY: - //sprintf(key_string, "%s", str_key, (md->mdhim_rank + 1)); - if (verbose) tst_say(0, "# mdhimPut( %s, %s) [string|byte]\n", str_key, value ); - brm = mdhimPut(md, (void*)str_key, BYTE_BUFLEN, value, - BYTE_BUFLEN, NULL, NULL); - // break; - // - // default: - // tst_say(1, "ERROR: unrecognized Key_type in execPut\n"); - // } - - // Report any error(s) - if (!brm || brm->error) - { - tst_say(1, "ERROR: rank %d putting key: %s with value: %s into MDHIM\n", - md->mdhim_rank, str_key, value); - } - else - { - tst_say(0, "Successfully put key/value into MDHIM\n"); - } - - // fclose(datafile); - close(x); - -} - -//======================================GET============================ -// Operations for getting a key/value from messages.h -// MDHIM_GET_EQ=0, MDHIM_GET_NEXT=1, MDHIM_GET_PREV=2 -// MDHIM_GET_FIRST=3, MDHIM_GET_LAST=4 -static void execGet(char *command, struct mdhim_t *md, int charIdx) -{ - // int i_key; - // long l_key; - // float f_key; - // double d_key; - struct mdhim_bgetrm_t *bgrm; - unsigned char str_key [ TEST_BUFLEN ]; - char buffer2 [ TEST_BUFLEN ]; - char key_string [ TEST_BUFLEN ]; - char returned_key [ TEST_BUFLEN ]; - char ophandle [TEST_BUFLEN]; - int getOp, newIdx, nkeys=100; - char fhandle [TEST_BUFLEN]; //Filename handle - int data_index =0; //Index for reading data. - char data_read[TEST_BUFLEN]; - int k; - - memset(fhandle, 0, sizeof(fhandle)); - memset(data_read, 0, sizeof(data_read)); - memset(key_string, 0, TEST_BUFLEN); - memset(returned_key, 0, TEST_BUFLEN); - memset(ophandle, 0, TEST_BUFLEN); - - if (verbose) tst_say(0, "# get key \n" ); - charIdx = getWordFromString( command, buffer2, charIdx); - nkeys = atoi(buffer2); - newIdx= getWordFromString( command, ophandle, charIdx); - if (newIdx != charIdx) - { - charIdx= newIdx; - if(strcmp(ophandle,"EQUAL")==0) getOp=MDHIM_GET_EQ; - else if(strcmp(ophandle,"PREV")==0) getOp=MDHIM_GET_PREV; - else if(strcmp(ophandle,"NEXT")==0) getOp=MDHIM_GET_NEXT; - else if(strcmp(ophandle,"FIRST")==0) getOp=MDHIM_GET_FIRST; - else if(strcmp(ophandle,"LAST")==0) getOp=MDHIM_GET_LAST; - else getOp=MDHIM_GET_EQ; - } - else - { - getOp = MDHIM_GET_EQ; //Default a get with an equal operator - } - - charIdx = getWordFromString( command, fhandle, newIdx); - int x = open( fhandle, O_RDONLY ); - if( x <0 ) - { - fprintf( stderr, "Failed to open data file %s, %s\n", - fhandle, strerror( errno )); - exit( -1 ); - } - - - - // datafile = fopen( fhandle, "r" ); - // if( !datafile ) - // { - // fprintf( stderr, "Failed to open data file %s, %s\n", - // fhandle, strerror( errno )); - // exit( -1 ); - // } - // fgets(data_read, TEST_BUFLEN, datafile); - // - // data_index= getWordFromString( data_read, str_key, data_index); - // newIdx = getWordFromString( data_read, buffer2, data_index); - // - for (k=0; kmdhim_rank + 1); - // sprintf(key_string, "%d", i_key); - // if (verbose) tst_say(0, "# mdhimGet( %s, %s ) [int]\n", key_string, getValLabel(getOp)); - // bgrm = mdhimGet(md, md->primary_index, &i_key, sizeof(i_key), getOp); - // break; - // - // case MDHIM_LONG_INT_KEY: - // l_key = atol( str_key ) * (md->mdhim_rank + 1); - // sprintf(key_string, "%ld", l_key); - // if (verbose) tst_say(0, "# mdhimGet( %s, %s ) [long]\n", key_string, getValLabel(getOp)); - // bgrm = mdhimGet(md, md->primary_index, &l_key, sizeof(l_key), getOp); - // break; - // - // case MDHIM_FLOAT_KEY: - // f_key = atof( str_key ) * (md->mdhim_rank + 1); - // sprintf(key_string, "%f", f_key); - // if (verbose) tst_say(0, "# mdhimGet( %s, %s ) [float]\n", key_string, getValLabel(getOp)); - // bgrm = mdhimGet(md, md->primary_index, &f_key, sizeof(f_key), getOp); - // break; - // - // case MDHIM_DOUBLE_KEY: - // d_key = atof( str_key ) * (md->mdhim_rank + 1); - // sprintf(key_string, "%e", d_key); - // if (verbose) tst_say(0, "# mdhimGet( %s, %s ) [double]\n", key_string, getValLabel(getOp)); - // bgrm = mdhimGet(md, md->primary_index, &d_key, sizeof(d_key), getOp); - // break; - // - // case MDHIM_STRING_KEY: - // case MDHIM_BYTE_KEY: - //sprintf(key_string, "%s", str_key); - - if (verbose) tst_say(0, "# mdhimGet( %s, %s ) [string|byte]\n", str_key, getValLabel(getOp)); - if(strcmp(ophandle,"EQUAL")==0) bgrm = mdhimGet(md, md->primary_index, (void *)str_key, BYTE_BUFLEN+1, getOp); - else if(strcmp(ophandle,"PREV")==0) bgrm = mdhimBGetOp(md, md->primary_index, (void *)str_key, BYTE_BUFLEN+1, 1, getOp); - else if(strcmp(ophandle,"NEXT")==0) bgrm = mdhimBGetOp(md, md->primary_index, (void *)str_key, BYTE_BUFLEN+1, 1, getOp); - else if(strcmp(ophandle,"FIRST")==0) bgrm =mdhimBGetOp(md, md->primary_index, (void *)str_key, BYTE_BUFLEN+1, 1, getOp); - else { - if (strcmp(ophandle,"LAST")==0) bgrm = mdhimBGetOp(md, md->primary_index, (void *)str_key, BYTE_BUFLEN+1, 1, getOp); - } - //bgrm = mdhimGet(md, md->primary_index, (void *)str_key, BYTE_BUFLEN, getOp); - // break; - // - // default:mdhimBGet(md, md->primary_index, keys, key_lens, nkeys, MDHIM_GET_EQ); - // else {tst_say(1, "Error, unrecognized Key_type in execGet\n"); - // return; - // } - - if (!bgrm || bgrm->error) - { - tst_say(1, "ERROR: rank %d getting value for key (%s): %s from MDHIM\n", - md->mdhim_rank, getValLabel(getOp), key_string); - } - //printf("This is the strlen for bgrm->keys[0]: %s", bgrm->keys[0]); - else if (bgrm->keys[0] && bgrm->values[0]) - { - // Generate correct string from returned key - switch (key_type) - { - case MDHIM_INT_KEY: - sprintf(returned_key, "[int]: %d", *((int *) bgrm->keys[0])); - break; - - case MDHIM_LONG_INT_KEY: - sprintf(returned_key, "[long]: %ld", *((long *) bgrm->keys[0])); - break; - - case MDHIM_FLOAT_KEY: - sprintf(returned_key, "[float]: %f", *((float *) bgrm->keys[0])); - break; - - case MDHIM_DOUBLE_KEY: - sprintf(returned_key, "[double]: %e", *((double *) bgrm->keys[0])); - break; - - case MDHIM_STRING_KEY: - case MDHIM_BYTE_KEY: - sprintf(returned_key, "[string|byte]: %s", (char *)bgrm->keys[0]); - - } - - //if ( v_value == NULL ) // No verification value, anything is OK. - if (bgrm->values[0] != NULL || bgrm->value_lens[0] != 0) - { - tst_say(0, "Successfully get(%s) correct value: %s for key %s from MDHIM\n", - getValLabel(getOp), expand_escapes(bgrm->values[0], bgrm->value_lens[0]+1), returned_key); - } - - } - else - { - tst_say(1, "ERROR: rank %d got(%s) null value or return key for key: %s from MDHIM\n", - md->mdhim_rank, getValLabel(getOp), key_string); - } - - } - close(x); -} - -//======================================BPUT============================ -static void execBput(char *command, struct mdhim_t *md, int charIdx) -{ - int nkeys = 100; - int ret; - char buffer1 [ TEST_BUFLEN ]; - unsigned char str_key [ TEST_BUFLEN ]; - unsigned char value [ TEST_BUFLEN ]; - struct mdhim_brm_t *brm, *brmp; - int i;// size_of; - void **keys; - int *key_lens; - char **values; - int *value_lens; - char fhandle [TEST_BUFLEN]; //Filename handle - //int data_index - //; //Index for reading data. - char data_read[TEST_BUFLEN]; - - - if (verbose) tst_say(0, "# bput n key data\n" ); - - - //Initialize variables - //size_of = 0; - keys = NULL; - - // Number of keys to generate - charIdx = getWordFromString( command, buffer1, charIdx); - nkeys = atoi( buffer1 ); - - memset(fhandle, 0, sizeof(fhandle)); - memset(data_read, 0, sizeof(data_read)); - - charIdx = getWordFromString( command, fhandle, charIdx); - int x = open( fhandle, O_RDONLY ); - if( x <0 ) - { - fprintf( stderr, "Failed to open data file %s, %s\n", - fhandle, strerror( errno )); - exit( -1 ); - } - int g; - - key_lens = malloc(sizeof(int) * nkeys); - value_lens = malloc(sizeof(int) * nkeys); - - if (verbose) tst_say(0, "# mdhimBPut(%d, %s, %s )\n", nkeys, str_key, value ); - - // Allocate memory and size of key (size of string|byte key will be modified - // when the key is constructed.) - values = malloc(sizeof(char *) * nkeys); - // switch (key_type) - // { - // case MDHIM_INT_KEY: - // keys = malloc(sizeof(int *) * nkeys); - // size_of = sizeof(int); - // break; - // - // case MDHIM_LONG_INT_KEY: - // keys = malloc(sizeof(long *) * nkeys); - // size_of = sizeof(long); - // break; - // - // case MDHIM_FLOAT_KEY: - // keys = malloc(sizeof(float *) * nkeys); - // size_of = sizeof(float); - // break; - // - // case MDHIM_DOUBLE_KEY: - // keys = malloc(sizeof(double *) * nkeys); - // size_of = sizeof(double); - // break; - // - // case MDHIM_STRING_KEY: - // case MDHIM_BYTE_KEY: - keys = malloc(sizeof(char *) * nkeys); - //size_of = sizeof(char); - // break; - // } - - //int q =0; - //while (q==0) sleep(5); - // Create the keys and values to store - for (i = 0; i < nkeys; i++) - { - //data_index = 0; - memset(str_key, 0, TEST_BUFLEN); - memset(value, 0, TEST_BUFLEN); - g=read(x, str_key, BYTE_BUFLEN); - if( g<0 ) - { - fprintf( stderr, "Failed to read data file %s, %s\n", - fhandle, strerror( errno )); - exit( -1 ); - } - g=read(x, value, BYTE_BUFLEN); - if( g<0 ) - { - fprintf( stderr, "Failed to read data file %s, %s\n", - fhandle, strerror( errno )); - exit( -1 ); - } - - g=read(x, fhandle,1); //Need it to get rid of the new line. - - keys[i] = malloc(BYTE_BUFLEN+1); - memset(keys[i], 0, BYTE_BUFLEN+1); - key_lens[i] = BYTE_BUFLEN; //size_of; - values[i] = malloc(sizeof(unsigned char) * BYTE_BUFLEN+1); - memset(values[i], 0, BYTE_BUFLEN+1); - memcpy(values[i], value, VAL_BUFLEN); - value_lens[i] = BYTE_BUFLEN;//strlen(values[i]) + 1; - - - - // Based on key type, rank and index number generate a key - // switch (key_type) - // { - // case MDHIM_INT_KEY: - // { - // int **i_keys = (int **)keys; - // *i_keys[i] = (atoi( str_key ) * (md->mdhim_rank + 1)); - // if (verbose) tst_say(0, "Rank: %d - Creating int key (to insert): " - // "%d with value: %s\n", - // md->mdhim_rank, *i_keys[i], values[i]); - // } - // break; - // - // case MDHIM_LONG_INT_KEY: - // { - // long **l_keys = (long **)keys; - // *l_keys[i] = (atol( str_key ) * (md->mdhim_rank + 1)); - // if (verbose) tst_say(0, "Rank: %d - Creating long key (to insert): " - // "%ld with value: %s\n", - // md->mdhim_rank, *l_keys[i], values[i]); - // } - // break; - // - // case MDHIM_FLOAT_KEY: - // { - // float **f_keys = (float **)keys; - // *f_keys[i] = (atof( str_key ) * (md->mdhim_rank + 1)) + (i + 1); - // if (verbose) tst_say(0, "Rank: %d - Creating float key (to insert): " - // "%f with value: %s\n", - // md->mdhim_rank, *f_keys[i], values[i]); - // } - // break; - // - // case MDHIM_DOUBLE_KEY: - // { - // double **d_keys = (double **)keys; - // *d_keys[i] = (atof( str_key ) * (md->mdhim_rank + 1)); - // if (verbose) tst_say(0, "Rank: %d - Creating double key (to insert): " - // "%e with value: %s\n", - // md->mdhim_rank, *d_keys[i], values[i]); - // } - // break; - // - // case MDHIM_STRING_KEY: - // case MDHIM_BYTE_KEY: - // { - //unsigned char **s_keys = (unsigned char **)keys; - //s_keys[i] = malloc(TEST_BUFLEN); - //sprintf(s_keys[i], "%s", str_key); - memcpy(keys[i], str_key, BYTE_BUFLEN); - key_lens[i] = BYTE_BUFLEN; - if (verbose) tst_say(0, "Rank: %d - Creating string|byte key " - "(to insert): %s with value: %s\n", - md->mdhim_rank, (unsigned char *)keys[i], values[i]); - // } - // break; - //} - } - - //Insert the keys into MDHIM - brm = mdhimBPut(md, keys, key_lens, (void **) values, value_lens, nkeys, NULL, NULL); - brmp = brm; - ret = 0; - if (!brm || brm->error) - { - tst_say(1, "ERROR: rank - %d bulk inserting keys/values into MDHIM\n", - md->mdhim_rank); - ret = 1; - } - - while (brmp) - { - if (brmp->error < 0) - { - tst_say(1, "ERROR: rank %d - Error bulk inserting key/values info MDHIM\n", - md->mdhim_rank); - ret = 1; - } - - brmp = brmp->next; - //Free the message - mdhim_full_release_msg(brm); - brm = brmp; - } - - // if NO errors report success - if (!ret) - { - tst_say(0, "Rank: %d - Successfully bulk inserted key/values into MDHIM\n", - md->mdhim_rank); - } - - // Release memory - freeKeyValueMem(nkeys, keys, key_lens, values, value_lens); - close(x); - //fclose(datafile); -} - -//======================================BGET============================ -static void execBget(char *command, struct mdhim_t *md, int charIdx) -{ - int nkeys = 100; - char buffer [ TEST_BUFLEN ]; - unsigned char str_key [ TEST_BUFLEN ]; - char *v_value = NULL; - if (verbose) tst_say(0, "# bget n key \n" ); - struct mdhim_bgetrm_t *bgrm, *bgrmp; - int i, /*size_of,*/ ret; - void **keys; - int *key_lens; - int totRecds; - char fhandle[TEST_BUFLEN]; - char data_read[TEST_BUFLEN]; - char opname[TEST_BUFLEN]; - //int data_index; - //size_of = 0; - keys = NULL; - - memset(fhandle, 0, sizeof(fhandle)); - memset(data_read, 0, sizeof(data_read)); - - - // Get the number of records to create for bget - charIdx = getWordFromString( command, buffer, charIdx); - nkeys = atoi( buffer ); - //printf("Here is the nkeys: %d\n", nkeys); - charIdx = getWordFromString( command, opname, charIdx); - - charIdx = getWordFromString( command, fhandle, charIdx); - //printf("Here is the file name: %s\n", fhandle); - // datafile = fopen( fhandle, "r" ); - // if( !datafile ) - // { - // fprintf( stderr, "Failed to open data file %s, %s\n", - // fhandle, strerror( errno )); - // exit( -1 ); - // } - - int x = open( fhandle, O_RDONLY ); - if( x <0 ) - { - fprintf( stderr, "Failed to open data file %s, %s\n", - fhandle, strerror( errno )); - exit( -1 ); - } - int g; - key_lens = malloc(sizeof(int) * nkeys); - - - if (verbose) tst_say(0, "# mdhimBGet(%d, %s)\n", nkeys, str_key ); - - // Allocate memory and size of key (size of string|byte key will be modified - // when the key is constructed.) - // switch (key_type) - // { - // case MDHIM_INT_KEY: - // keys = malloc(sizeof(int *) * nkeys); - // size_of = sizeof(int); - // break; - // - // case MDHIM_LONG_INT_KEY: - // keys = malloc(sizeof(long *) * nkeys); - // size_of = sizeof(long);fcreat - // break; - // - // case MDHIM_FLOAT_KEY: - // keys = malloc(sizeof(float *) * nkeys); - // size_of = sizeof(float); - // break; - // - // case MDHIM_DOUBLE_KEY: - // keys = malloc(sizeof(double *) * nkeys); - // size_of = sizeof(double); - // break; - // - // case MDHIM_STRING_KEY: - // case MDHIM_BYTE_KEY: - keys = malloc(sizeof(unsigned char *) * nkeys); - //size_of = sizeof(char); - // break; - // } - - - // Generate the keys as set above - for (i = 0; i < nkeys; i++) - { - //data_index = 0; - // fgets(data_read, TEST_BUFLEN, datafile); - memset(str_key, 0, BYTE_BUFLEN); - // data_index = getWordFromString( data_read, str_key, data_index); - keys[i] = malloc(BYTE_BUFLEN+1); - memset(keys[i],0,BYTE_BUFLEN+1); - g=0; - // Based on key type, rank and index number generate a key - g=read(x, str_key, BYTE_BUFLEN); - if( g<0 ) - { - fprintf( stderr, "Failed to read data file %s, %s\n", - fhandle, strerror( errno )); - exit( -1 ); - } - g=read(x, fhandle,1); //Need it to get rid of the new line. - - memcpy(keys[i], str_key, BYTE_BUFLEN); - key_lens[i] = BYTE_BUFLEN+1; - if (verbose) tst_say(0, "Rank: %d - Creating string|byte key (to get):" - " %s\n", md->mdhim_rank, keys[i]); - // } - // break; - // - // default: - // tst_say(1, "Error, unrecognized Key_type in execBGet\n"); - // return; - // } - // - } - - //Get the values back for each key retrieved; - bgrm = mdhimBGet(md, md->primary_index, keys, key_lens, nkeys, MDHIM_GET_EQ); - - - ret = 0; // Used to determine if any errors are encountered - - totRecds = 0; - bgrmp = bgrm; - - while (bgrmp) { - if (bgrmp->error < 0) - { - tst_say(1, "ERROR: rank %d retrieving values\n", md->mdhim_rank); - ret = 1; - } - - totRecds += bgrmp->num_keys; - for (i = 0; i < bgrmp->num_keys && bgrmp->error >= 0; i++) - { - //if ( v_value != NULL ) - // sprintf(buffer, "%s_%d", v_value, i + 1); //Value to verify - - if (verbose) tst_say(0, "Rank: %d successfully get[%d] correct value: %s from MDHIM key %s\n", - md->mdhim_rank, i, expand_escapes(bgrmp->values[i], bgrmp->value_lens[i]+1), bgrmp->keys[i]); - - } - - bgrmp = bgrmp->next; - //Free the message received - mdhim_full_release_msg(bgrm); - bgrm = bgrmp; - } - - // if NO errors report success - if (!ret) - { - if (totRecds) - tst_say(0, "Rank: %d - Successfully bulk retrieved %d key/values from MDHIM\n", - md->mdhim_rank, totRecds); - else - tst_say(1, "ERROR: rank %d got no records for bulk retrieved from MDHIM\n", - md->mdhim_rank); - } - - // Release memory - freeKeyValueMem(nkeys, keys, key_lens, NULL, NULL); - free(v_value); - close(x); -} - -//======================================BGETOP============================ -static void execBgetOp(char *command, struct mdhim_t *md, int charIdx) -{ - int nrecs = 100; - char buffer1 [ TEST_BUFLEN ]; - char key_string [ TEST_BUFLEN ]; - struct mdhim_bgetrm_t *bgrm; - int i, getOp, newIdx, data_index; - char ophandle[TEST_BUFLEN]; - // int i_key; - // long l_key; - // float f_key; - // double d_key; - char fhandle[TEST_BUFLEN]; - char str_key[TEST_BUFLEN]; - - if (verbose) tst_say(0, "# bgetop n key op\n" ); - - bgrm = NULL; - // Get the number of records to retrieve in bgetop - charIdx = getWordFromString( command, buffer1, charIdx); - nrecs = atoi( buffer1 ); - - // Get the key to use as starting point - - newIdx = getWordFromString( command, ophandle, charIdx); - if (newIdx != charIdx) - { - charIdx = newIdx; - if(strcmp(ophandle,"EQUAL")==0) getOp=MDHIM_GET_EQ; - else if(strcmp(ophandle,"PREV")==0) getOp=MDHIM_GET_PREV; - else if(strcmp(ophandle,"NEXT")==0) getOp=MDHIM_GET_NEXT; - else if(strcmp(ophandle,"FIRST")==0) getOp=MDHIM_GET_FIRST; - else if(strcmp(ophandle,"LAST")==0) getOp=MDHIM_GET_LAST; - else getOp=MDHIM_GET_EQ; - } - else - { - getOp = MDHIM_GET_EQ; //Default a get with an equal operator - } - charIdx = getWordFromString( command, fhandle, charIdx); - int x = open( fhandle, O_RDONLY ); - if( x <0 ) - { - fprintf( stderr, "Failed to open data file %s, %s\n", - fhandle, strerror( errno )); - exit( -1 ); - } - - // Get the operation type to use - - if (verbose) tst_say(0, "# mdhimBGetOp(%d, %s)\n", - nrecs, getValLabel(getOp) ); - - - data_index = read(x, str_key, BYTE_BUFLEN); - if( data_index<0 ) - { - fprintf( stderr, "Failed to read data file %s, %s\n", - fhandle, strerror( errno )); - exit( -1 ); - } - data_index=read(x,buffer1, 1); - // Based on key type generate a key using rank - // switch (key_type) - // { - // case MDHIM_INT_KEY: - // i_key = atoi( key_string ) * (md->mdhim_rank + 1) + 1; - // sprintf(key_string, "%d", i_key); - // if (verbose) tst_say(0, "# mdhimBGetOp( %d, %s, %s ) [int]\n", - // nrecs, key_string, getValLabel(getOp)); - // bgrm = mdhimBGetOp(md, md->primary_index, &i_key, sizeof(i_key), nrecs, getOp); - // break; - // - // case MDHIM_LONG_INT_KEY: - // l_key = atol( key_string ) * (md->mdhim_rank + 1) + 1; - // sprintf(key_string, "%ld", l_key); - // if (verbose) tst_say(0, "# mdhimBGetOp( %d, %s, %s ) [long]\n", - // nrecs, key_string, getValLabel(getOp)); - // bgrm = mdhimBGetOp(md, md->primary_index, &l_key, sizeof(l_key), nrecs, getOp); - // break; - // - // case MDHIM_FLOAT_KEY: - // f_key = atof( key_string ) * (md->mdhim_rank + 1) + 1; - // sprintf(key_string, "%f", f_key); - // if (verbose) tst_say(0, "# mdhimBGetOp( %d, %s, %s ) [float]\n", - // nrecs, key_string, getValLabel(getOp)); - // bgrm = mdhimBGetOp(md, md->primary_index, &f_key, sizeof(f_key), nrecs, getOp); - // break; - // - // case MDHIM_DOUBLE_KEY: - // d_key = atof( key_string ) * (md->mdhim_rank + 1) + 1; - // sprintf(key_string, "%e", d_key); - // if (verbose) tst_say(0, "# mdhimBGetOp( %d, %s, %s ) [double]\n", - // nrecs, key_string, getValLabel(getOp)); - // bgrm = mdhimBGetOp(md, md->primary_index, &d_key, sizeof(d_key), nrecs, getOp); - // break; - // - // case MDHIM_STRING_KEY:// case MDHIM_STRING_KEY: - - // case MDHIM_BYTE_KEY: - memset(key_string, 0, TEST_BUFLEN); - memcpy(key_string, str_key, BYTE_BUFLEN); - //sprintf(key_string, "%s", key_string); - //if (verbose) tst_say(0, "# mdhimBGetOp( %d, %s, %s ) [string|byte]\n", - // nrecs, key_string, getValLabel(getOp)); - bgrm = mdhimBGetOp(md, md->primary_index, (void *)key_string, BYTE_BUFLEN+1, - nrecs, getOp); - // break; - // - // default: - // tst_say(1, "Error, unrecognized Key_type in execGet\n"); - // } - - if (!bgrm || bgrm->error) - { - tst_say(1, "ERROR: rank %d getting %d values for start key (%s): %s from MDHIM\n", - md->mdhim_rank, nrecs, getValLabel(getOp), key_string); - } - else if (verbose) - { - for (i = 0; i < bgrm->num_keys && !bgrm->error; i++) - { - tst_say(0, "Rank: %d - Got value[%d]: %s for start key: %s from MDHIM\n", - md->mdhim_rank, i, expand_escapes(bgrm->values[i], bgrm->value_lens[i]+1), - key_string); - } - } - else - { - tst_say(0, "Rank: %d - Successfully got %d values for start key: %s from MDHIM\n", - md->mdhim_rank, bgrm->num_keys, key_string); - } - - //Free the message received - mdhim_full_release_msg(bgrm); -} - -//======================================DEL============================ -static void execDel(char *command, struct mdhim_t *md, int charIdx) -{ - // int i_key; - // long l_key; - // float f_key; - // double d_key; - char str_key [ TEST_BUFLEN ]; - char key_string [ TEST_BUFLEN ]; - struct mdhim_brm_t *brm; - char fhandle [TEST_BUFLEN]; //Filename handle - int data_index =0; //Index for reading data. - char data_read[TEST_BUFLEN]; - memset(fhandle, 0, sizeof(fhandle)); - memset(data_read, 0, sizeof(data_read)); - - charIdx = getWordFromString( command, fhandle, charIdx); - int x = open( fhandle, O_RDONLY ); - if( x <0 ) - { - fprintf( stderr, "Failed to open data file %s, %s\n", - fhandle, strerror( errno )); - exit( -1 ); - } - data_index=read(x, str_key, BYTE_BUFLEN); - if( data_index<0 ) - { - fprintf( stderr, "Failed to read data file %s, %s\n", - fhandle, strerror( errno )); - exit( -1 ); - } - - if (verbose) tst_say(0, "# del key\n" ); - - brm = NULL; - //charIdx = getWordFromString( data_read, str_key, data_index); - - // switch (key_type) - // { - // case MDHIM_INT_KEY: - // i_key = atoi( str_key ) * (md->mdhim_rank + 1); - // sprintf(key_string, "%d", i_key); - // if (verbose) tst_say(0, "# mdhimDelete( %s ) [int]\n", key_string); - // brm = mdhimDelete(md, md->primary_index, &i_key, sizeof(i_key)); - // break; - // - // case MDHIM_LONG_INT_KEY: - // l_key = atol( str_key ) * (md->mdhim_rank + 1); - // sprintf(key_string, "%ld", l_key); - // if (verbose) tst_say(0, "# mdhimDelete( %s ) [long]\n", key_string); - // brm = mdhimDelete(md, md->primary_index, &l_key, sizeof(l_key)); - // break; - // - // case MDHIM_FLOAT_KEY: - // f_key = atof( str_key ) * (md->mdhim_rank + 1); - // sprintf(key_string, "%f", f_key); - // if (verbose) tst_say(0, "# mdhimDelete( %s ) [float]\n", key_string); - // brm = mdhimDelete(md, md->primary_index, &f_key, sizeof(f_key)); - // break; - // - // case MDHIM_DOUBLE_KEY: - // d_key = atof( str_key ) * (md->mdhim_rank + 1); - // sprintf(key_string, "%e", d_key); - // if (verbose) tst_say(0, "# mdhimDelete( %s ) [double]\n", key_string); - // brm = mdhimDelete(md, md->primary_index, &d_key, sizeof(d_key)); - // break; - // - // case MDHIM_STRING_KEY: - // case MDHIM_BYTE_KEY: - //sprintf(key_string, "%s", str_key); - if (verbose) tst_say(0, "# mdhimDelete( %s ) [string|byte]\n", key_string); - brm = mdhimDelete(md, md->primary_index, (void *)str_key, BYTE_BUFLEN); - // break; - // - // default: - // tst_say(1, "Error, unrecognized Key_type in execDelete\n"); - // } - - if (!brm || brm->error) - { - tst_say(1, "ERROR: rank %d deleting key/value from MDHIM. key: %s\n", - md->mdhim_rank, str_key); - } - else - { - tst_say(0, "Successfully deleted key/value from MDHIM. key: %s\n", str_key); - } - -} - -//======================================BDEL============================ -static void execBdel(char *command, struct mdhim_t *md, int charIdx) -{ - int nkeys = 100; - char buffer1 [ TEST_BUFLEN ]; - unsigned char str_key [ TEST_BUFLEN ]; - void **keys; - int *key_lens; - struct mdhim_brm_t *brm, *brmp; - int i, /*size_of, */ ret; - char fhandle [TEST_BUFLEN]; //Filename handle - int data_index; //Index for reading data. - char data_read[TEST_BUFLEN]; - memset(fhandle, 0, sizeof(fhandle)); - memset(data_read, 0, sizeof(data_read)); - - if (verbose) tst_say(0, "# bdel n key\n" ); - - keys = NULL; - //size_of = 0; - - // Number of records to delete - charIdx = getWordFromString( command, buffer1, charIdx); - nkeys = atoi( buffer1 ); - key_lens = malloc(sizeof(int) * nkeys); - - // Starting key value - charIdx = getWordFromString( command, fhandle, charIdx); - int x = open( fhandle, O_RDONLY ); - if( x <0 ) - { - fprintf( stderr, "Failed to open data file %s, %s\n", - fhandle, strerror( errno )); - exit( -1 ); - } - // datafile = fopen( fhandle, "r" ); - // if( !datafile ) - // { - // fprintf( stderr, "Failed to open data file %s, %s\n", - // fhandle, strerror( errno )); - // exit( -1 ); - // } - - - if (verbose) tst_say(0, "# mdhimBDelete(%d, %s )\n", nkeys, str_key ); - - // Allocate memory and size of key (size of string|byte key will be modified - // when the key is constructed.) - // switch (key_type) - // { - // case MDHIM_INT_KEY: - // keys = malloc(sizeof(int *) * nkeys); - // size_of = sizeof(int); - // break; - // - // case MDHIM_LONG_INT_KEY: - // keys = malloc(sizeof(long *) * nkeys); - // size_of = sizeof(long); - // break; - // - // case MDHIM_FLOAT_KEY: - // keys = malloc(sizeof(float *) * nkeys); - // size_of = sizeof(float); - // break; - // - // case MDHIM_DOUBLE_KEY: - // keys = malloc(sizeof(double *) * nkeys); - // size_of = sizeof(double); - // break; - // - // case MDHIM_STRING_KEY: - // case MDHIM_BYTE_KEY: - keys = malloc(sizeof(char *) * nkeys); - // size_of = sizeof(char); - // break; - // } - - for (i = 0; i < nkeys; i++) - { - data_index =0; - data_index=read(x, str_key, BYTE_BUFLEN); - if( data_index<0 ) - { - fprintf( stderr, "Failed to read data file %s, %s\n", - fhandle, strerror( errno )); - exit( -1 ); - } - - keys[i] = malloc(BYTE_BUFLEN+1); - key_lens[i] = BYTE_BUFLEN; - data_index=read(x, fhandle,1); //Need it to get rid of the new line. - - // Based on key type, rank and index number generate a key - // switch (key_type) - // { - // case MDHIM_INT_KEY: - // { - // int **i_keys = (int **)keys; - // *i_keys[i] = (atoi( str_key ) * (md->mdhim_rank + 1)) + (i + 1); - // if (verbose) tst_say(0, "Rank: %d - Creating int key (to delete): %d\n", - // md->mdhim_rank, *i_keys[i]); - // } - // break; - // - // case MDHIM_LONG_INT_KEY: - // { - // long **l_keys = (long **)keys; - // *l_keys[i] = (atol( str_key ) * (md->mdhim_rank + 1)) + (i + 1); - // if (verbose) tst_say(0, "Rank: %d - Creating long key (to delete): %ld\n", - // md->mdhim_rank, *l_keys[i]); - // } - // break; - // - // case MDHIM_FLOAT_KEY: - // { - // float **f_keys = (float **)keys; - // *f_keys[i] = (atof( str_key ) * (md->mdhim_rank + 1)) + (i + 1); - // if (verbose) tst_say(0, "Rank: %d - Creating float key (to delete): %f\n", - // md->mdhim_rank, *f_keys[i]); - // } - // break; - // - // case MDHIM_DOUBLE_KEY: - // { - // double **d_keys = (double **)keys; - // *d_keys[i] = (atof( str_key ) * (md->mdhim_rank + 1)) + (i + 1); - // if (verbose) tst_say(0, "Rank: %d - Creating double key (to delete): " - // " %e\n", md->mdhim_rank, *d_keys[i]); - // } - // break; - // - // case MDHIM_STRING_KEY: - // case MDHIM_BYTE_KEY: - // { - // char **s_keys = (char **)keys; - // s_keys[i] = malloc(BYTE_BUFLEN); - memset(keys[i],0,BYTE_BUFLEN+1); - memcpy(keys[i], str_key, BYTE_BUFLEN); - key_lens[i] = BYTE_BUFLEN; - if (verbose) tst_say(0, "Rank: %d - Creating string|byte key (to delete):" - " %s\n", md->mdhim_rank, keys[i]); - // } - // break; - // - // default: - // tst_say(1, "Error, unrecognized Key_type in execBDel\n"); - // return; - // } - - } - - //Delete the records - brm = mdhimBDelete(md, md->primary_index, (void **) keys, key_lens, nkeys); - brmp = brm; - if (!brm || brm->error) { - tst_say(1, "ERROR: rank %d deleting keys/values from MDHIM\n", - md->mdhim_rank); - } - - ret = 0; - while (brmp) - { - if (brmp->error < 0) - { - tst_say(1, "ERROR: rank %d deleting keys\n", md->mdhim_rank); - ret = 1; - } - - brmp = brmp->next; - //Free the message - mdhim_full_release_msg(brm); - brm = brmp; - } - - // if NO errors report success - if (!ret) - { - tst_say(0, "Rank: %d - Successfully bulk deleted key/values from MDHIM\n", - md->mdhim_rank); - } - - // Release memory - freeKeyValueMem(nkeys, keys, key_lens, NULL, NULL); - close(x); -} - -// Generate a random string of up to max_len -char *random_string( int max_len, int exact_size) -{ - int len; - char *retVal; - int i; - - if (exact_size) - len = max_len; - else - len = rand() % max_len + 1; - retVal = (char *) malloc( len + 1 ); - - for (i=0; i 0) free(value); - memset(str_key, 0, sizeof(str_key)); - memset(buffer2, 0, sizeof(buffer2)); - g=read(x, str_key, BYTE_BUFLEN); - if( g<0 ) - { - fprintf( stderr, "Failed to read data file %s, %s\n", - fhandle, strerror( errno )); - exit( -1 ); - } - // data_index= getWordFromString( data_read, str_key, data_index); - // data_index = getWordFromString( data_read, value, data_index); - - g=read(x, buffer2, BYTE_BUFLEN); - if( g<0 ) - { - fprintf( stderr, "Failed to read data file %s, %s\n", - fhandle, strerror( errno )); - exit( -1 ); - } - g=read(x, fhandle,1); //Need it to get rid of the new line. - // Based on key type generate appropriate random key - // switch (key_type) - // { - // case MDHIM_INT_KEY: - // i_key = atoi(str_key) * (md->mdhim_rank + 1); - // sprintf(key_string, "%d", i_key); - // if (verbose) tst_say(0, "# mdhimPut( %s, %s ) [int]\n", - // key_string, value ); - // brm = mdhimPut(md, &i_key, sizeof(i_key), value, strlen(value)+1, NULL, NULL); - // break; - // - // case MDHIM_LONG_INT_KEY: - // l_key = atol(str_key) * (md->mdhim_rank + 1); - // sprintf(key_string, "%ld", l_key); - // if (verbose) tst_say(0, "# mdhimPut( %s, %s ) [long]\n", - // key_string, value ); - // brm = mdhimPut(md, &l_key, sizeof(l_key), value, strlen(value)+1, NULL, NULL); - // break; - // - // case MDHIM_FLOAT_KEY: - // f_key = atof( str_key ) * (md->mdhim_rank + 1); - // sprintf(key_string, "%f", f_key); - // if (verbose) tst_say(0, "# mdhimPut( %s, %s ) [float]\n", - // key_string, value ); - // brm = mdhimPut(md, &f_key, sizeof(f_key), value, strlen(value)+1, NULL, NULL); - // break; - // - // case MDHIM_DOUBLE_KEY: - // d_key = atof( str_key ) * (md->mdhim_rank + 1); - // sprintf(key_string, "%e", d_key); - // if (verbose) tst_say(0, "# mdhimPut( %s, %s ) [double]\n", - // key_string, value ); - // brm = mdhimPut(md, &d_key, sizeof(d_key), value, strlen(value)+1, NULL, NULL); - // break; - // - // case MDHIM_STRING_KEY: - // case MDHIM_BYTE_KEY: - - memset(value,0 ,VAL_BUFLEN+1); - memset(key_string, 0,BYTE_BUFLEN+1); - memcpy(key_string, str_key,BYTE_BUFLEN); - memcpy(value, buffer2,VAL_BUFLEN); - if (verbose) tst_say(0, "# mdhimPut( %s, %s ) [string|byte]\n", - key_string, value ); - brm = mdhimPut(md, (void *)key_string, BYTE_BUFLEN, - value, BYTE_BUFLEN, NULL, NULL); - // break; - // - // default: - // tst_say(1, "Error, unrecognized Key_type in execNput\n"); - // } - - // Record any error(s) - if (!brm || brm->error) - { - if (verbose) tst_say(1, "ERROR: rank %d N putting key: %s with value: %s " - "into MDHIM\n", md->mdhim_rank, key_string, value); - ret ++; - } - - } - - // Report any error(s) - if (ret) - { - tst_say(1, "ERROR: rank %d - %d error(s) N putting key/value into MDHIM\n", - md->mdhim_rank, ret); - } - else - { - tst_say(0, "Successfully N put %d key/values into MDHIM\n", n_iter); - } - - close(x); -} - -//======================================NGETN============================ -static void execNgetn(char *command, struct mdhim_t *md, int charIdx) -{ - // int i_key; - // long l_key; - // float f_key; - // double d_key; - struct mdhim_bgetrm_t *bgrm; - char buffer [ TEST_BUFLEN ]; - unsigned char key_string[TEST_BUFLEN]; - int n_iter; - int ret, i; - int getOp; - //int newIdx; - char fhandle [TEST_BUFLEN]; //Filename handle - int data_index =0; //Index for reading data. - char data_read[TEST_BUFLEN]; - unsigned char str_key[TEST_BUFLEN]; - char buffer2[TEST_BUFLEN]; - memset(fhandle, 0, sizeof(fhandle)); - memset(data_read, 0, sizeof(data_read)); - - charIdx = getWordFromString( command, buffer, charIdx); - n_iter = atoi( buffer ); // Get number of iterations - - - charIdx = getWordFromString( command, fhandle, charIdx); - int x = open( fhandle, O_RDONLY ); - if( x <0 ) - { - fprintf( stderr, "Failed to open data file %s, %s\n", - fhandle, strerror( errno )); - exit( -1 ); - } - - - // datafile = fopen( fhandle, "r" ); - // if( !datafile ) - // { - // fprintf( stderr, "Failed to open data file %s, %s\n", - // fhandle, strerror( errno )); - // exit( -1 ); - // } - for (i=0; imdhim_rank + 1); - // sprintf(key_string, "%d", i_key); - // if (verbose) tst_say(0, "# mdhimGet( %s ) [int]\n", key_string ); - // bgrm = mdhimGet(md, md->primary_index, &i_key, sizeof(i_key), getOp); - // break; - // - // case MDHIM_LONG_INT_KEY: - // l_key = atol( str_key ) * (md->mdhim_rank + 1); - // sprintf(key_string, "%ld", l_key); - // if (verbose) tst_say(0, "# mdhimGet( %s ) [long]\n", key_string ); - // bgrm = mdhimGet(md, md->primary_index, &l_key, sizeof(l_key), getOp); - // break; - // - // case MDHIM_FLOAT_KEY: - // f_key = atof( str_key ) * (md->mdhim_rank + 1); - // sprintf(key_string, "%f", f_key); - // if (verbose) tst_say(0, "# mdhimGet( %s ) [float]\n", key_string ); - // bgrm = mdhimGet(md, md->primary_index, &f_key, sizeof(f_key), getOp); - // break; - // - // case MDHIM_DOUBLE_KEY: - // d_key = atof( str_key ) * (md->mdhim_rank + 1); - // sprintf(key_string, "%e", d_key); - // if (verbose) tst_say(0, "# mdhimGet( %s ) [double]\n", key_string ); - // bgrm = mdhimGet(md, md->primary_index, &d_key, sizeof(d_key), getOp); - // break; - // - // case MDHIM_STRING_KEY: - // case MDHIM_BYTE_KEY: - memcpy(key_string, str_key, BYTE_BUFLEN); - if (verbose) tst_say(0, "# mdhimGet( %s ) [string|byte]\n", key_string ); - bgrm = mdhimGet(md, md->primary_index, (void *)key_string, - BYTE_BUFLEN, getOp); - // break; - // - // default: - // tst_say(1, "Error, unrecognized Key_type in execNgetn\n"); - // } - - // Record any error(s) - // if (!grm || grm->error) - // { - // // For some reason could not get first record abort the request - // tst_say(1, "ERROR: rank %d N getting FIRST key: %s from MDHIM\n", - // md->mdhim_rank, key_string); - // ret++; - // } - // else if (grm->key && grm->value) - // { - // if (verbose) tst_say(0, "Successfully got FIRST value: %s for key " - // "[string|byte](%s) from MDHIM\n", - // expand_escapes(grm->value, grm->value_len), - // getValLabel(MDHIM_GET_NEXT)); - // - - //grm = mdhimGet(md, grm->key, grm->key_len, MDHIM_GET_NEXT); - // Record any error(s) - if (!bgrm || bgrm->error) - { - tst_say(1, "ERROR: rank %d N getting key[%d] from MDHIM. Abort request.\n", - md->mdhim_rank, i); - ret ++; - break; - } - else if (bgrm->keys[0] && bgrm->values[0]) - { - if (verbose) tst_say(0, "Successfully got value %d: %s for key " - "[string|byte](%s) from MDHIM\n", i, - expand_escapes(bgrm->values[0], bgrm->value_lens[0]), - getValLabel(getOp)); - } - else - { - tst_say(1, "ERROR: rank %d got null value or key at N get key[%d] " - "from MDHIM. Abort request.\n", md->mdhim_rank, i); - ret ++; - break; - } - } - // else - // { - // tst_say(1, "ERROR: rank %d got null value or return key for FIRST key (%s): %s from MDHIM\n", - // md->mdhim_rank, getValLabel(getOp), key_string); - // ret++; - // } - - // Report any error(s) - if (ret) - { - tst_say(1, "ERROR: rank %d got %d error(s) N getting key/value from MDHIM\n", - md->mdhim_rank, ret); - } - else - { - tst_say(0, "Successfully N got %d out of %d key/values desired from MDHIM\n", - i, n_iter); - } - close(x); -} - -/** - * test frame for the MDHIM - * - * This test frame calls the MDHIM subroutines, it is an interactive or batch file - * test frame which could be used for regression tests. - * - * When the program is called in verbose mode (not quiet) it also writes a log file - * for each rank with the name mdhimTst-#.log (where # is the rank) - * - * Interactive mode or commands read from input file. - * -- Interactive mode simply execute mdhimtst, (by default it is verbose) - * -- Batch mode mdhimtst -f -d -t <-quiet> - *
    - * Call the program mdhimtst from a UNIX or DOS shell. (with a -f for batch mode) - *
    - * Use the following commands to test the MDHIM subroutines supplied: - *
      - *
      - q       FOR QUIT
      - /////////open filename 
      - /////////transaction < START | COMMIT | ROLLBACK >
      - /////////close
      - /////////flush
      - put key data
      - bput n key data
      - /////////find index key < LT | LE | FI | EQ | LA | GE | GT >
      - /////////nfind n index key < LT | LE | FI | EQ | LA | GE | GT >
      - get key
      - bget n key
      - del key
      - bdel n key
      - /////////datalen
      - /////////readdata
      - /////////readkey index
      - /////////updatedata data
      - /////////updatekey index key
      - 
      - *
    - * Do the following if you want to run the test cases per hand - *
    - 1. Build the mdhimtst executable.          make all
    - 2. Run the test frame on a file.        mdhimtst tst0001.TST
    - 
    - * - *
- */ - -int main( int argc, char * argv[] ) -{ - char commands[ 1000 ] [ TEST_BUFLEN ]; // Command to be read - int cmdIdx = 0; // Command current index - int cmdTot = 0; // Total number of commands read - int charIdx; // Index to last processed character of a command line - char command [ TEST_BUFLEN ]; - char filename [ TEST_BUFLEN ]; - char *db_path = "mdhimTst"; - char *db_name = "mdhimTst-"; - int dowork = 1; - int dbug = 1; //MLOG_CRIT=1, MLOG_DBG=2 - int factor = 1; //Range server factor - int slice = 100000; //Range server slice size - - struct timeval begin, end; - long double time_spent; - - mdhim_options_t *db_opts; // Local variable for db create options to be passed - - int ret; - int provided = 0; - struct mdhim_t *md; - - int db_type = LEVELDB; //(data_store.h) - MPI_Comm comm; - //Variables to get range for - char *rs, *rso; //Holders for strsep - int ri = 0, rf = 0; //Rank range initial and rank range final - int rd; //Rank divider - //int rdc = 1; //Rank divider control - int rrc = 0; //Rank range control - - // Process arguments - infile = stdin; - while ((argc > 1) && (argv[1][0] == '-')) - { - switch (argv[1][1]) - { - case 'f': - printf("Input file: %s || ", &argv[1][2]); - infile = fopen( &argv[1][2], "r" ); - if( !infile ) - { - fprintf( stderr, "Failed to open %s, %s\n", - &argv[1][2], strerror( errno )); - exit( -1 ); - } - break; - - case 'l': - printf("Key length: %s || ", &argv[1][2]); - BYTE_BUFLEN = atoi( &argv[1][2]); - break; - - - case 'v': - printf("Value length: %s || ", &argv[1][2]); - VAL_BUFLEN = atoi( &argv[1][2]); - break; - - case 'd': // DataBase type (1=levelDB) - printf("Data Base type: %s || ", &argv[1][2]); - db_type = atoi( &argv[1][2] ); - break; - - case 't': - printf("Key type: %s || ", &argv[1][2]); - key_type = atoi( &argv[1][2] ); - break; - - case 'b': - printf("Debug mode: %s || ", &argv[1][2]); - dbug = atoi( &argv[1][2] ); - break; - - case 'p': - printf("DB Path: %s || ", &argv[1][2]); - db_path = &argv[1][2]; - break; - - case 'n': - printf("DB name: %s || ", &argv[1][2]); - db_name = &argv[1][2]; - break; - - case 'c': - printf("Range server factor: %s || ", &argv[1][2]); - factor = atoi( &argv[1][2] ); - break; - - case 's': - printf("Range server slice size: %s || ", &argv[1][2]); - slice = atoi( &argv[1][2] ); - break; - - case 'a': - printf("DB option append value is on || "); - dbOptionAppend = MDHIM_DB_APPEND; - break; - - case 'q': - to_log = atoi( &argv[1][2] ); - if (!to_log) - { - printf("Quiet mode || "); - verbose = 0; - } - else - { - printf("Quiet to_log file mode || "); - } - break; - case 'r': - rs=strdup(&argv[1][2]); - rso = strsep(&rs, "~"); - ri = atoi(rso); - rf = atoi(rs); - printf("Range: %d to %d || ", ri, rf); - break; - case 'w': - rd=atoi(&argv[1][2]); - printf("Range divider : %d ||", rd); - break; - case 'h': - usage(); - break; - default: - printf("Wrong Argument (it will be ignored): %s\n", argv[1]); - usage(); - } - - ++argv; - --argc; - } - printf("\n"); - - // Set the debug flag to the appropriate Mlog mask - switch (dbug) - { - case 2: - dbug = MLOG_DBG; - break; - - default: - dbug = MLOG_CRIT; - } - - // calls to init MPI for mdhim - argc = 1; // Ignore other parameters passed to program - ret = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); - if (ret != MPI_SUCCESS) - { - printf("Error initializing MPI with threads\n"); - exit(1); - } - - if (provided != MPI_THREAD_MULTIPLE) - { - printf("Not able to enable MPI_THREAD_MULTIPLE mode\n"); - exit(1); - } - - // Create options for DB initialization - db_opts = mdhim_options_init(); - mdhim_options_set_db_path(db_opts, db_path); - mdhim_options_set_db_name(db_opts, db_name); - mdhim_options_set_db_type(db_opts, db_type); - mdhim_options_set_key_type(db_opts, key_type); - mdhim_options_set_debug_level(db_opts, dbug); - mdhim_options_set_login_c(db_opts, "localhost", "root", "pass", "localhost", "stater", "pass"); - mdhim_options_set_server_factor(db_opts, factor); - mdhim_options_set_max_recs_per_slice(db_opts, slice); - mdhim_options_set_value_append(db_opts, dbOptionAppend); // Default is overwrite - - //Setup Login credientials to database - //mdhim_options_set_login_c(db_opts, host server, user name, user' password, statstics user name, statistic user name's password); - comm = MPI_COMM_WORLD; - md = mdhimInit(&comm, db_opts); - if (!md) - { - printf("Error initializing MDHIM\n"); - exit(1); - } - - /* initialization for random string generation */ - srand( time( NULL ) + md->mdhim_rank); - sc_len = strlen( sourceChars ); - - /* - * open the log file (one per rank if in verbose mode, otherwise write to stderr) - */ - if (verbose) - { - sprintf(filename, "./%s%d.log", db_name, md->mdhim_rank); - logfile = fopen( filename, "wb" ); - if( !logfile ) - { - fprintf( stderr, "can't open logfile, %s, %s\n", filename, - strerror( errno )); - exit( 1 ); - } - } - else - { - logfile = stderr; - } - - // Read all command(s) to execute - while( dowork && cmdIdx < 1000) - { - // read the next command - memset( commands[cmdIdx], 0, sizeof( command )); - errno = 0; - getLine( commands[cmdIdx]); - - if (verbose) tst_say(0, "\n##command %d: %s\n", cmdIdx, commands[cmdIdx]); - - // Is this the last/quit command? - if( commands[cmdIdx][0] == 'q' || commands[cmdIdx][0] == 'Q' ) - { - dowork = 0; - } - cmdIdx++; - } - cmdTot = cmdIdx -1; - - // Main command execute loop - for(cmdIdx=0; cmdIdx < cmdTot; cmdIdx++) - { - - memset( command, 0, sizeof( command )); - errno = 0; - - charIdx = getWordFromString( commands[cmdIdx], command, 0); - - if (verbose) tst_say(0, "\n##exec command: %s\n", command ); - gettimeofday(&begin, NULL); - // execute the command given - if( !strcmp( command, "put" )) - { - if (check_rank_range(md->mdhim_rank, ri, rf)==rrc) execPut(commands[cmdIdx], md, charIdx); - } - else if( !strcmp( command, "get" )) - { - - if (check_rank_range(md->mdhim_rank, ri, rf)==rrc)execGet(commands[cmdIdx], md, charIdx); - } - else if ( !strcmp( command, "bput" )) - { - if (check_rank_range(md->mdhim_rank, ri, rf)==rrc) execBput(commands[cmdIdx], md, charIdx); - } - else if ( !strcmp( command, "bget" )) - { - if (check_rank_range(md->mdhim_rank, ri, rf)==rrc) execBget(commands[cmdIdx], md, charIdx); - } - else if( !strcmp( command, "del" )) - { - if (check_rank_range(md->mdhim_rank, ri, rf)==rrc) execDel(commands[cmdIdx], md, charIdx); - } - else if( !strcmp( command, "bdel" )) - { - if (check_rank_range(md->mdhim_rank, ri, rf)==rrc) execBdel(commands[cmdIdx], md, charIdx); - } - else if( !strcmp( command, "flush" )) - { - execFlush(commands[cmdIdx], md, charIdx); - } - else if( !strcmp( command, "nput" )) - { - if (check_rank_range(md->mdhim_rank, ri, rf)==rrc) execNput(commands[cmdIdx], md, charIdx); - } - else if( !strcmp( command, "ngetn" )) - { - if (check_rank_range(md->mdhim_rank, ri, rf)==rrc) execNgetn(commands[cmdIdx], md, charIdx); - } - else if( !strcmp( command, "bgetop" )) - { - if (check_rank_range(md->mdhim_rank, ri, rf)==rrc) execBgetOp(commands[cmdIdx], md, charIdx); - } - else - { - printf( "# q FOR QUIT\n" ); - //printf( "# open filename keyfile1,dkeyfile2,... update\n" ); - //printf( "# close\n" ); - printf( "# flush\n" ); - printf( "# put key value\n" ); - printf( "# bput n key value\n" ); - printf( "# nput n key_size value_size exact_size #(0=variable | 1=exact)\n"); - printf( "# get key getOp #(EQ=0 | NEXT=1 | PREV=2 | FIRST=3 | LAST=4)\n" ); - printf( "# bget n key\n" ); - printf( "# bgetop n key getOp #(NEXT=1 | FIRST=3)\n" ); - printf( "# ngetn n key_length exact_size #(0=variable | 1=exact)\n" ); - printf( "# del key\n" ); - printf( "# bdel n key\n" ); - - } - - gettimeofday(&end, NULL); - time_spent = (long double) (end.tv_sec - begin.tv_sec) + - ((long double) (end.tv_usec - begin.tv_usec)/1000000.0); - tst_say(0, "Seconds to %s : %Lf\n\n", commands[cmdIdx], time_spent); - } - - - if (errMsgIdx) - { - - - int i, errsInCmds = errMsgIdx; // Only list the errors up to now - for (i=0; i %d (first %d shown)\n", - md->mdhim_rank, errsInCmds, i); - } - else - { - tst_say(0, "\n==No errors for rank: %d\n", md->mdhim_rank); - } - - // Calls to finalize mdhim session and close MPI-communication - ret = mdhimClose(md); - if (ret != MDHIM_SUCCESS) - { - tst_say(1, "Error closing MDHIM\n"); - } - fclose(logfile); - - MPI_Barrier(MPI_COMM_WORLD); - MPI_Finalize(); - - return( 0 ); -} - diff --git a/meta/tests/tester/runAll.sh b/meta/tests/tester/runAll.sh deleted file mode 100644 index 50c02e84f..000000000 --- a/meta/tests/tester/runAll.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/sh -# -# echo "type INT" -# -# rm -rf ./mdhimTst* ; mpirun -np 2 ./mdhimtst -finTestBasic.txt -t1 -q -d3 -p./ -# rm mdhim.manifest -# rm -rf ./mdhimTst* ; mpirun -np 2 ./mdhimtst -finTestNext.txt -t1 -q -d3 -p./ -# rm mdhim.manifest -# -echo "type LONG" - -rm -rf ./mdhimTst* ; mpirun -np 2 ./mdhimtst -finTestBasic.txt -t2 -q -d3 -p./ -rm mdhim.manifest -rm -rf ./mdhimTst* ; mpirun -np 2 ./mdhimtst -finTestNext.txt -t2 -q -d3 -p./ -rm mdhim.manifest - -echo "type FLOAT" - -rm -rf ./mdhimTst* ; mpirun -np 2 ./mdhimtst -finTestBasic.txt -t3 -q -d3 -p./ -rm mdhim.manifest -rm -rf ./mdhimTst* ; mpirun -np 2 ./mdhimtst -finTestNext.txt -t3 -q -d3 -p./ -rm mdhim.manifest - -echo "type DOUBLE" - -rm -rf ./mdhimTst* ; mpirun -np 2 ./mdhimtst -finTestBasic.txt -t4 -q -d3 -p./ -rm mdhim.manifest -rm -rf ./mdhimTst* ; mpirun -np 2 ./mdhimtst -finTestNext.txt -t4 -q -d3 -p./ -rm mdhim.manifest - -# echo "type STRING" -# -# rm -rf ./mdhimTst* ; mpirun -np 2 ./mdhimtst -finTestBasic.txt -t5 -q -d3 -p./ -# rm mdhim.manifest -# rm -rf ./mdhimTst* ; mpirun -np 2 ./mdhimtst -finTestNext.txt -t5 -q -d3 -p./ -# rm mdhim.manifest -# rm -rf ./mdhimTst* ; mpirun -np 2 ./mdhimtst -finTestLarge.txt -t5 -q -d3 -p./ -# rm mdhim.manifest -# -# echo "type BYTE" -# -# rm -rf ./mdhimTst* ; mpirun -np 2 ./mdhimtst -finTestBasic.txt -t6 -q -d3 -p./ -# rm mdhim.manifest -# rm -rf ./mdhimTst* ; mpirun -np 2 ./mdhimtst -finTestNext.txt -t6 -q -d3 -p./ -# rm mdhim.manifest -# rm -rf ./mdhimTst* ; mpirun -np 2 ./mdhimtst -finTestLarge.txt -t6 -q -d3 -p./ -# rm mdhim.manifest diff --git a/server/src/Makefile.am b/server/src/Makefile.am index 634503de2..17db2994a 100644 --- a/server/src/Makefile.am +++ b/server/src/Makefile.am @@ -16,6 +16,7 @@ unifyfsd_SOURCES = \ margo_server.h \ unifyfs_client_rpc.c \ unifyfs_fops.h \ + unifyfs_fops_rpc.c \ unifyfs_global.h \ unifyfs_group_rpc.h \ unifyfs_group_rpc.c \ @@ -23,7 +24,6 @@ unifyfsd_SOURCES = \ unifyfs_inode.c \ unifyfs_inode_tree.h \ unifyfs_inode_tree.c \ - unifyfs_metadata_mdhim.h \ unifyfs_p2p_rpc.h \ unifyfs_p2p_rpc.c \ unifyfs_request_manager.c \ @@ -42,36 +42,6 @@ OPT_C_FLAGS = OPT_LD_FLAGS = OPT_LIBS = -if USE_MDHIM - - unifyfsd_SOURCES += \ - unifyfs_metadata_mdhim.c \ - unifyfs_fops_mdhim.c - - OPT_CPP_FLAGS += \ - -DUSE_MDHIM \ - -I$(top_srcdir)/meta/src \ - -I$(top_srcdir)/meta/src/uthash \ - -I$(top_srcdir)/meta/src/Mlog2 - - OPT_C_FLAGS += \ - $(LEVELDB_CFLAGS) \ - $(MPI_CFLAGS) - - OPT_LD_FLAGS += \ - $(LEVELDB_LDFLAGS) \ - $(MPI_CLDFLAGS) - - OPT_LIBS += \ - $(top_builddir)/meta/src/libmdhim.a \ - $(LEVELDB_LIBS) - -else # ! USE_MDHIM - - unifyfsd_SOURCES += unifyfs_fops_rpc.c - -endif # USE_MDHIM - if USE_PMIX OPT_C_FLAGS += -DUSE_PMIX OPT_LIBS += -lpmix diff --git a/server/src/extent_tree.c b/server/src/extent_tree.c index 48a5d7bb4..1a6815e2d 100644 --- a/server/src/extent_tree.c +++ b/server/src/extent_tree.c @@ -19,7 +19,6 @@ */ #include "extent_tree.h" -#include "unifyfs_metadata_mdhim.h" #undef MIN #define MIN(a, b) (a < b ? a : b) @@ -46,7 +45,7 @@ RB_GENERATE(ext_tree, extent_tree_node, entry, etn_compare_func) int extent_tree_init(struct extent_tree* tree) { memset(tree, 0, sizeof(*tree)); - pthread_rwlock_init(&(tree->rwlock), NULL); + ABT_rwlock_create(&(tree->rwlock)); RB_INIT(&(tree->head)); return 0; } @@ -57,7 +56,7 @@ int extent_tree_init(struct extent_tree* tree) void extent_tree_destroy(struct extent_tree* tree) { extent_tree_clear(tree); - pthread_rwlock_destroy(&(tree->rwlock)); + ABT_rwlock_free(&(tree->rwlock)); } /* Allocate a node for the range tree. Free node with free() when finished */ @@ -356,7 +355,7 @@ int extent_tree_truncate( return 0; } - /* lock the tree for reading */ + /* lock the tree */ extent_tree_wrlock(tree); /* lookup node with the extent that has the maximum offset */ @@ -402,7 +401,7 @@ int extent_tree_truncate( tree->max = 0; } - /* done reading the tree */ + /* done updating the tree */ extent_tree_unlock(tree); return 0; @@ -462,9 +461,9 @@ struct extent_tree_node* extent_tree_iter( */ void extent_tree_rdlock(struct extent_tree* tree) { - int rc = pthread_rwlock_rdlock(&(tree->rwlock)); + int rc = ABT_rwlock_rdlock(tree->rwlock); if (rc) { - LOGERR("pthread_rwlock_rdlock() failed - rc=%d", rc); + LOGERR("ABT_rwlock_rdlock() failed - rc=%d", rc); } } @@ -475,9 +474,9 @@ void extent_tree_rdlock(struct extent_tree* tree) */ void extent_tree_wrlock(struct extent_tree* tree) { - int rc = pthread_rwlock_wrlock(&(tree->rwlock)); + int rc = ABT_rwlock_wrlock(tree->rwlock); if (rc) { - LOGERR("pthread_rwlock_wrlock() failed - rc=%d", rc); + LOGERR("ABT_rwlock_wrlock() failed - rc=%d", rc); } } @@ -488,9 +487,9 @@ void extent_tree_wrlock(struct extent_tree* tree) */ void extent_tree_unlock(struct extent_tree* tree) { - int rc = pthread_rwlock_unlock(&(tree->rwlock)); + int rc = ABT_rwlock_unlock(tree->rwlock); if (rc) { - LOGERR("pthread_rwlock_unlock() failed - rc=%d", rc); + LOGERR("ABT_rwlock_unlock() failed - rc=%d", rc); } } @@ -547,65 +546,6 @@ unsigned long extent_tree_max_offset(struct extent_tree* tree) return max; } -/* Given an extent tree and starting and ending logical offsets, - * fill in key/value entries that overlap that range. - * Returns at most max entries starting from lowest starting offset. - * Sets outnum with actual number of entries returned */ -int extent_tree_span( - struct extent_tree* tree, /* extent tree to search */ - int gfid, /* global file id we're looking in */ - unsigned long start, /* starting logical offset */ - unsigned long end, /* ending logical offset */ - int max, /* maximum number of key/vals to return */ - void* _keys, /* array of length max for output keys */ - void* _vals, /* array of length max for output values */ - int* outnum) /* number of entries returned */ -{ - unifyfs_key_t* keys = (unifyfs_key_t*) _keys; - unifyfs_val_t* vals = (unifyfs_val_t*) _vals; - - /* initialize output parameters */ - *outnum = 0; - - /* lock the tree for reading */ - extent_tree_rdlock(tree); - - int count = 0; - struct extent_tree_node* next = extent_tree_find(tree, start, end); - while ((NULL != next) && - (next->extent.start <= end) && - (count < max)) { - /* got an entry that overlaps with given range */ - - /* fill in key */ - unifyfs_key_t* key = &keys[count]; - key->gfid = gfid; - key->offset = next->extent.start; - - /* fill in value */ - unifyfs_val_t* val = &vals[count]; - val->addr = next->extent.log_pos; - val->len = next->extent.end - next->extent.start + 1; - val->delegator_rank = next->extent.svr_rank; - val->app_id = next->extent.app_id; - val->rank = next->extent.cli_id; - - /* increment the number of key/values we found */ - count++; - - /* get the next element in the tree */ - next = extent_tree_iter(tree, next); - } - - /* return to user the number of key/values we set */ - *outnum = count; - - /* done reading the tree */ - extent_tree_unlock(tree); - - return 0; -} - static void chunk_req_from_extent( unsigned long req_offset, unsigned long req_len, diff --git a/server/src/extent_tree.h b/server/src/extent_tree.h index 55f38aca4..9476c29e2 100644 --- a/server/src/extent_tree.h +++ b/server/src/extent_tree.h @@ -42,7 +42,7 @@ struct extent_tree_node { struct extent_tree { RB_HEAD(ext_tree, extent_tree_node) head; - pthread_rwlock_t rwlock; + ABT_rwlock rwlock; unsigned long count; /* number of segments stored in tree */ unsigned long max; /* maximum logical offset value in the tree */ }; @@ -149,20 +149,6 @@ void extent_tree_wrlock(struct extent_tree* tree); */ void extent_tree_unlock(struct extent_tree* tree); -/* given an extent tree and starting and ending logical offsets, - * fill in key/value entries that overlap that range, returns at - * most max entries starting from lowest starting offset, - * sets outnum with actual number of entries returned */ -int extent_tree_span( - struct extent_tree* tree, /* extent tree to search */ - int gfid, /* global file id we're looking in */ - unsigned long start, /* starting logical offset */ - unsigned long end, /* ending logical offset */ - int max, /* maximum number of key/vals to return */ - void* keys, /* array of length max for output keys */ - void* vals, /* array of length max for output values */ - int* outnum); /* number of entries returned */ - int extent_tree_get_chunk_list( struct extent_tree* tree, /* extent tree to search */ unsigned long offset, /* starting logical offset */ diff --git a/server/src/margo_server.c b/server/src/margo_server.c index f2b709fde..d3ce6651e 100644 --- a/server/src/margo_server.c +++ b/server/src/margo_server.c @@ -155,6 +155,12 @@ static void register_server_server_rpcs(margo_instance_id mid) bcast_progress_in_t, bcast_progress_out_t, bcast_progress_rpc); + unifyfsd_rpc_context->rpcs.bootstrap_complete_bcast_id = + MARGO_REGISTER(mid, "bootstrap_complete_bcast_rpc", + bootstrap_complete_bcast_in_t, + bootstrap_complete_bcast_out_t, + bootstrap_complete_bcast_rpc); + unifyfsd_rpc_context->rpcs.chunk_read_request_id = MARGO_REGISTER(mid, "chunk_read_request_rpc", chunk_read_request_in_t, chunk_read_request_out_t, @@ -245,6 +251,10 @@ static void register_server_server_rpcs(margo_instance_id mid) unifyfs_node_local_extents_get_in_t, unifyfs_node_local_extents_get_out_t, unifyfs_node_local_extents_get_rpc); + unifyfsd_rpc_context->rpcs.metaget_all_bcast_id = + MARGO_REGISTER(mid, "metaget_all_bcast_rpc", + metaget_all_bcast_in_t, metaget_all_bcast_out_t, + metaget_all_bcast_rpc); } /* setup_local_target - Initializes the client-server margo target */ @@ -530,7 +540,7 @@ int margo_connect_servers(void) /* allocate array of structs to record address for each server */ server_infos = (server_info_t*) calloc(glb_num_servers, - sizeof(server_info_t)); + sizeof(server_info_t)); if (NULL == server_infos) { LOGERR("failed to allocate server_info array"); return ENOMEM; @@ -602,7 +612,7 @@ static int forward_to_client(hg_handle_t hdl, void* input_ptr) double timeout_msec = margo_client_server_timeout_msec; hg_return_t hret = margo_forward_timed(hdl, input_ptr, timeout_msec); if (hret != HG_SUCCESS) { - LOGERR("margo_forward_timed() failed - %s", HG_Error_to_string(hret)); + LOGWARN("margo_forward_timed() failed - %s", HG_Error_to_string(hret)); return UNIFYFS_ERROR_MARGO; } return UNIFYFS_SUCCESS; @@ -633,7 +643,7 @@ int invoke_client_heartbeat_rpc(int app_id, app_id, client_id); int rc = forward_to_client(handle, &in); if (rc != UNIFYFS_SUCCESS) { - LOGERR("forward of heartbeat rpc to client failed"); + LOGINFO("forward of heartbeat rpc to client failed"); margo_destroy(handle); return rc; } diff --git a/server/src/margo_server.h b/server/src/margo_server.h index 9117d6342..3265adef8 100644 --- a/server/src/margo_server.h +++ b/server/src/margo_server.h @@ -32,6 +32,7 @@ typedef struct ServerRpcIds { /* server-server rpcs */ hg_id_t bcast_progress_id; + hg_id_t bootstrap_complete_bcast_id; hg_id_t chunk_read_request_id; hg_id_t chunk_read_response_id; hg_id_t extent_add_id; @@ -50,6 +51,7 @@ typedef struct ServerRpcIds { hg_id_t truncate_bcast_id; hg_id_t unlink_bcast_id; hg_id_t node_local_extents_get_id; + hg_id_t metaget_all_bcast_id; /* client-server rpcs */ hg_id_t client_heartbeat_id; diff --git a/server/src/unifyfs_client_rpc.c b/server/src/unifyfs_client_rpc.c index 07bed2a01..c6895eb7a 100644 --- a/server/src/unifyfs_client_rpc.c +++ b/server/src/unifyfs_client_rpc.c @@ -33,7 +33,6 @@ // server components #include "unifyfs_global.h" -#include "unifyfs_metadata_mdhim.h" #include "unifyfs_request_manager.h" // margo rpcs @@ -67,6 +66,7 @@ static void create_mountpoint_dir(int app_id, fattr.atime = tp; fattr.mtime = tp; fattr.ctime = tp; + fattr.last_update = tp.tv_sec; /* capture current uid and gid */ fattr.uid = getuid(); diff --git a/server/src/unifyfs_fops.h b/server/src/unifyfs_fops.h index 0c2522b28..0c5b68bd6 100644 --- a/server/src/unifyfs_fops.h +++ b/server/src/unifyfs_fops.h @@ -18,6 +18,7 @@ #include "unifyfs_configurator.h" #include "unifyfs_log.h" #include "unifyfs_meta.h" +#include "unifyfs_request_manager.h" /* * extra information that we need to pass for file operations. @@ -31,7 +32,8 @@ typedef struct _unifyfs_fops_ctx unifyfs_fops_ctx_t; typedef int (*unifyfs_fops_init_t)(unifyfs_cfg_t* cfg); -typedef int (*unifyfs_fops_fsync_t)(unifyfs_fops_ctx_t* ctx, int gfid); +typedef int (*unifyfs_fops_fsync_t)(unifyfs_fops_ctx_t* ctx, + int gfid, client_rpc_req_t* client_req); typedef int (*unifyfs_fops_filesize_t)(unifyfs_fops_ctx_t* ctx, int gfid, size_t* filesize); @@ -119,13 +121,15 @@ static inline int unifyfs_fops_filesize(unifyfs_fops_ctx_t* ctx, return global_fops_tab->filesize(ctx, gfid, filesize); } -static inline int unifyfs_fops_fsync(unifyfs_fops_ctx_t* ctx, int gfid) +static inline int unifyfs_fops_fsync(unifyfs_fops_ctx_t* ctx, + int gfid, + client_rpc_req_t* client_req) { if (!global_fops_tab->fsync) { return ENOSYS; } - return global_fops_tab->fsync(ctx, gfid); + return global_fops_tab->fsync(ctx, gfid, client_req); } static inline int unifyfs_fops_laminate(unifyfs_fops_ctx_t* ctx, int gfid) diff --git a/server/src/unifyfs_fops_mdhim.c b/server/src/unifyfs_fops_mdhim.c deleted file mode 100644 index a76a1a431..000000000 --- a/server/src/unifyfs_fops_mdhim.c +++ /dev/null @@ -1,1198 +0,0 @@ -/* - * Copyright (c) 2020, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * - * Copyright 2020, UT-Battelle, LLC. - * - * LLNL-CODE-741539 - * All rights reserved. - * - * This is the license for UnifyFS. - * For details, see https://github.com/LLNL/UnifyFS. - * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. - */ - -#include "unifyfs_group_rpc.h" -#include "unifyfs_metadata_mdhim.h" -#include "unifyfs_request_manager.h" - -/* given an extent corresponding to a write index, create new key/value - * pairs for that extent, splitting into multiple keys at the slice - * range boundaries (meta_slice_sz), it returns the number of - * newly created key/values inserted into the given key and value - * arrays */ -static int split_index( - unifyfs_key_t** keys, /* list to add newly created keys into */ - unifyfs_val_t** vals, /* list to add newly created values into */ - int* keylens, /* list for size of each key */ - int* vallens, /* list for size of each value */ - int gfid, /* global file id of write */ - size_t offset, /* starting byte offset of extent */ - size_t length, /* number of bytes in extent */ - size_t log_offset, /* offset within data log */ - int server_rank, /* rank of server hosting data */ - int app_id, /* app_id holding data */ - int client_rank) /* client rank holding data */ -{ - /* offset of first byte in request */ - size_t pos = offset; - - /* offset of last byte in request */ - size_t last_offset = offset + length - 1; - - /* this will track the current offset within the log - * where the data starts, we advance it with each key - * we generate depending on the data associated with - * each key */ - size_t logpos = log_offset; - - /* iterate over slice ranges and generate a start/end - * pair of keys for each */ - int count = 0; - while (pos <= last_offset) { - /* compute offset for first byte in this slice */ - size_t start = pos; - - /* offset for last byte in this slice, - * assume that's the last byte of the same slice - * containing start, unless that happens to be - * beyond the last byte of the actual request */ - size_t start_slice = start / meta_slice_sz; - size_t end = (start_slice + 1) * meta_slice_sz - 1; - if (end > last_offset) { - end = last_offset; - } - - /* length of extent in this slice */ - size_t len = end - start + 1; - - /* create key to describe this log entry */ - unifyfs_key_t* k = keys[count]; - k->gfid = gfid; - k->offset = start; - keylens[count] = sizeof(unifyfs_key_t); - - /* create value to store address of data */ - unifyfs_val_t* v = vals[count]; - v->addr = logpos; - v->len = len; - v->app_id = app_id; - v->rank = client_rank; - v->delegator_rank = server_rank; - vallens[count] = sizeof(unifyfs_val_t); - - /* advance to next slot in key/value arrays */ - count++; - - /* advance offset into log */ - logpos += len; - - /* advance to first byte offset of next slice */ - pos = end + 1; - } - - /* return number of keys we generated */ - return count; -} - -/* given a global file id, an offset, and a length to read from that - * file, create keys needed to query MDHIM for location of data - * corresponding to that extent, returns the number of keys inserted - * into key array provided by caller */ -static int split_request( - unifyfs_key_t** keys, /* list to add newly created keys into */ - int* keylens, /* list to add byte size of each key */ - int gfid, /* target global file id to read from */ - size_t offset, /* starting offset of read */ - size_t length) /* number of bytes to read */ -{ - /* offset of first byte in request */ - size_t pos = offset; - - /* offset of last byte in request */ - size_t last_offset = offset + length - 1; - - /* iterate over slice ranges and generate a start/end - * pair of keys for each */ - int count = 0; - while (pos <= last_offset) { - /* compute offset for first byte in this segment */ - size_t start = pos; - - /* offset for last byte in this segment, - * assume that's the last byte of the same segment - * containing start, unless that happens to be - * beyond the last byte of the actual request */ - size_t start_slice = start / meta_slice_sz; - size_t end = (start_slice + 1) * meta_slice_sz - 1; - if (end > last_offset) { - end = last_offset; - } - - /* create key to describe first byte we'll read - * in this slice */ - keys[count]->gfid = gfid; - keys[count]->offset = start; - keylens[count] = sizeof(unifyfs_key_t); - count++; - - /* create key to describe last byte we'll read - * in this slice */ - keys[count]->gfid = gfid; - keys[count]->offset = end; - keylens[count] = sizeof(unifyfs_key_t); - count++; - - /* advance to first byte offset of next slice */ - pos = end + 1; - } - - /* return number of keys we generated */ - return count; -} - - -static int mdhim_init(unifyfs_cfg_t* cfg) -{ - int ret = 0; - - LOGDBG("initializing file operations.."); - - ret = meta_init_store(cfg); - if (ret) { - LOGERR("failed to initialize the meta kv store (ret=%d)", ret); - } - - return ret; -} - -static int mdhim_metaget(unifyfs_fops_ctx_t* ctx, - int gfid, unifyfs_file_attr_t* attr) -{ - return unifyfs_get_file_attribute(gfid, attr); -} - -static int mdhim_metaset(unifyfs_fops_ctx_t* ctx, - int gfid, int create, unifyfs_file_attr_t* attr) -{ - return unifyfs_set_file_attribute(create, create, attr); -} - -static int mdhim_fsync(unifyfs_fops_ctx_t* ctx, int gfid) -{ - size_t i; - - /* assume we'll succeed */ - int ret = (int)UNIFYFS_SUCCESS; - - /* get memory page size on this machine */ - int page_sz = (int) get_page_size(); - - /* get application client */ - app_client* client = get_app_client(ctx->app_id, ctx->client_id); - if (NULL == client) { - return EINVAL; - } - - /* get pointer to superblock for this client and app */ - shm_context* super_ctx = client->state.shm_super_ctx; - if (NULL == super_ctx) { - LOGERR("missing client superblock"); - return UNIFYFS_FAILURE; - } - char* superblk = (char*)(super_ctx->addr); - - /* get pointer to start of key/value region in superblock */ - char* meta = superblk + client->state.write_index.index_offset; - - /* get number of file extent index values client has for us, - * stored as a size_t value in meta region of shared memory */ - size_t extent_num_entries = *(size_t*)(meta); - - /* indices are stored in the superblock shared memory - * created by the client, these are stored as index_t - * structs starting one page size offset into meta region */ - char* ptr_extents = meta + page_sz; - - if (extent_num_entries == 0) { - /* Nothing to do */ - return UNIFYFS_SUCCESS; - } - - unifyfs_index_t* meta_payload = (unifyfs_index_t*)(ptr_extents); - - /* total up number of key/value pairs we'll need for this - * set of index values */ - size_t slices = 0; - for (i = 0; i < extent_num_entries; i++) { - size_t offset = meta_payload[i].file_pos; - size_t length = meta_payload[i].length; - slices += meta_num_slices(offset, length); - } - if (slices >= UNIFYFS_MAX_META_SPLIT_COUNT) { - LOGERR("Error allocating buffers"); - return ENOMEM; - } - - /* pointers to memory we'll dynamically allocate for file extents */ - unifyfs_key_t** keys = NULL; - unifyfs_val_t** vals = NULL; - int* key_lens = NULL; - int* val_lens = NULL; - - /* allocate storage for file extent key/values */ - /* TODO: possibly get this from memory pool */ - keys = alloc_key_array(slices); - vals = alloc_value_array(slices); - key_lens = calloc(slices, sizeof(int)); - val_lens = calloc(slices, sizeof(int)); - if ((NULL == keys) || - (NULL == vals) || - (NULL == key_lens) || - (NULL == val_lens)) { - LOGERR("failed to allocate memory for file extents"); - ret = ENOMEM; - goto mdhim_sync_exit; - } - - /* create file extent key/values for insertion into MDHIM */ - int count = 0; - for (i = 0; i < extent_num_entries; i++) { - /* get file offset, length, and log offset for this entry */ - unifyfs_index_t* ndx = &meta_payload[i]; - assert(gfid == ndx->gfid); - size_t offset = ndx->file_pos; - size_t length = ndx->length; - size_t logpos = ndx->log_pos; - - /* split this entry at the offset boundaries */ - int used = split_index( - &keys[count], &vals[count], &key_lens[count], &val_lens[count], - gfid, offset, length, logpos, - glb_pmi_rank, ctx->app_id, ctx->client_id); - - /* count up the number of keys we used for this index */ - count += used; - } - - /* batch insert file extent key/values into MDHIM */ - ret = unifyfs_set_file_extents((int)count, - keys, key_lens, vals, val_lens); - if (ret != UNIFYFS_SUCCESS) { - /* TODO: need proper error handling */ - LOGERR("unifyfs_set_file_extents() failed"); - goto mdhim_sync_exit; - } - -mdhim_sync_exit: - /* clean up memory */ - if (NULL != keys) { - free_key_array(keys); - } - - if (NULL != vals) { - free_value_array(vals); - } - - if (NULL != key_lens) { - free(key_lens); - } - - if (NULL != val_lens) { - free(val_lens); - } - - return ret; -} - -static int mdhim_filesize(unifyfs_fops_ctx_t* ctx, int gfid, size_t* outsize) -{ - size_t filesize = 0; - int ret = unifyfs_invoke_filesize_rpc(gfid, &filesize); - if (ret) { - LOGERR("filesize rpc failed (ret=%d)", ret); - } else { - LOGDBG("filesize rpc returned %zu", filesize); - *outsize = filesize; - } - - unifyfs_file_attr_t attr = { 0, }; - mdhim_metaget(ctx, gfid, &attr); - - /* return greater of rpc value and mdhim metadata size */ - size_t asize = (size_t) attr.size; - if (asize > filesize) { - *outsize = asize; - } - - return ret; -} - -/* delete any key whose last byte is beyond the specified - * file size */ -static int truncate_delete_keys( - size_t filesize, /* new file size */ - int num, /* number of entries in keyvals */ - unifyfs_keyval_t* keyvals) /* list of existing key/values */ -{ - /* assume we'll succeed */ - int ret = (int) UNIFYFS_SUCCESS; - - /* pointers to memory we'll dynamically allocate for file extents */ - unifyfs_key_t** unifyfs_keys = NULL; - unifyfs_val_t** unifyfs_vals = NULL; - int* unifyfs_key_lens = NULL; - int* unifyfs_val_lens = NULL; - - /* in the worst case, we'll have to delete all existing keys */ - /* allocate storage for file extent key/values */ - /* TODO: possibly get this from memory pool */ - unifyfs_keys = alloc_key_array(num); - unifyfs_vals = alloc_value_array(num); - unifyfs_key_lens = calloc(num, sizeof(int)); - unifyfs_val_lens = calloc(num, sizeof(int)); - if ((NULL == unifyfs_keys) || - (NULL == unifyfs_vals) || - (NULL == unifyfs_key_lens) || - (NULL == unifyfs_val_lens)) { - LOGERR("failed to allocate memory for file extents"); - ret = ENOMEM; - goto truncate_delete_exit; - } - - /* counter for number of key/values we need to delete */ - int delete_count = 0; - - /* iterate over each key, and if this index extends beyond desired - * file size, create an entry to delete that key */ - int i; - for (i = 0; i < num; i++) { - /* get pointer to next key value pair */ - unifyfs_keyval_t* kv = &keyvals[i]; - - /* get last byte offset for this segment of the file */ - size_t last_offset = kv->key.offset + kv->val.len; - - /* if this segment extends beyond the new file size, - * we need to delete this index entry */ - if (last_offset > filesize) { - /* found an index that extends past end of desired - * file size, get next empty key entry from the pool */ - unifyfs_key_t* key = unifyfs_keys[delete_count]; - - /* define the key to be deleted */ - key->gfid = kv->key.gfid; - key->offset = kv->key.offset; - - /* MDHIM needs to know the byte size of each key and value */ - unifyfs_key_lens[delete_count] = sizeof(unifyfs_key_t); - //unifyfs_val_lens[delete_count] = sizeof(unifyfs_val_t); - - /* increment the number of keys we're deleting */ - delete_count++; - } - } - - /* batch delete file extent key/values from MDHIM */ - if (delete_count > 0) { - ret = unifyfs_delete_file_extents(delete_count, - unifyfs_keys, unifyfs_key_lens); - if (ret != UNIFYFS_SUCCESS) { - /* TODO: need proper error handling */ - LOGERR("unifyfs_delete_file_extents() failed"); - goto truncate_delete_exit; - } - } - -truncate_delete_exit: - /* clean up memory */ - - if (NULL != unifyfs_keys) { - free_key_array(unifyfs_keys); - } - - if (NULL != unifyfs_vals) { - free_value_array(unifyfs_vals); - } - - if (NULL != unifyfs_key_lens) { - free(unifyfs_key_lens); - } - - if (NULL != unifyfs_val_lens) { - free(unifyfs_val_lens); - } - - return ret; -} - -/* rewrite any key that overlaps with new file size, - * we assume the existing key has already been deleted */ -static int truncate_rewrite_keys( - size_t filesize, /* new file size */ - int num, /* number of entries in keyvals */ - unifyfs_keyval_t* keyvals) /* list of existing key/values */ -{ - /* assume we'll succeed */ - int ret = (int) UNIFYFS_SUCCESS; - - /* pointers to memory we'll dynamically allocate for file extents */ - unifyfs_key_t** unifyfs_keys = NULL; - unifyfs_val_t** unifyfs_vals = NULL; - int* unifyfs_key_lens = NULL; - int* unifyfs_val_lens = NULL; - - /* in the worst case, we'll have to rewrite all existing keys */ - /* allocate storage for file extent key/values */ - /* TODO: possibly get this from memory pool */ - unifyfs_keys = alloc_key_array(num); - unifyfs_vals = alloc_value_array(num); - unifyfs_key_lens = calloc(num, sizeof(int)); - unifyfs_val_lens = calloc(num, sizeof(int)); - if ((NULL == unifyfs_keys) || - (NULL == unifyfs_vals) || - (NULL == unifyfs_key_lens) || - (NULL == unifyfs_val_lens)) { - LOGERR("failed to allocate memory for file extents"); - ret = ENOMEM; - goto truncate_rewrite_exit; - } - - /* counter for number of key/values we need to rewrite */ - int count = 0; - - /* iterate over each key, and if this index starts before - * and ends after the desired file size, create an entry - * that ends at new file size */ - int i; - for (i = 0; i < num; i++) { - /* get pointer to next key value pair */ - unifyfs_keyval_t* kv = &keyvals[i]; - - /* get first byte offset for this segment of the file */ - size_t first_offset = kv->key.offset; - - /* get last byte offset for this segment of the file */ - size_t last_offset = kv->key.offset + kv->val.len; - - /* if this segment extends beyond the new file size, - * we need to rewrite this index entry */ - if (first_offset < filesize && - last_offset > filesize) { - /* found an index that overlaps end of desired - * file size, get next empty key entry from the pool */ - unifyfs_key_t* key = unifyfs_keys[count]; - - /* define the key to be rewritten */ - key->gfid = kv->key.gfid; - key->offset = kv->key.offset; - - /* compute new length of this entry */ - size_t newlen = (size_t)(filesize - first_offset); - - /* for the value, we store the log position, the length, - * the host server (delegator rank), the mount point id - * (app id), and the client id (rank) */ - unifyfs_val_t* val = unifyfs_vals[count]; - val->addr = kv->val.addr; - val->len = newlen; - val->delegator_rank = kv->val.delegator_rank; - val->app_id = kv->val.app_id; - val->rank = kv->val.rank; - - /* MDHIM needs to know the byte size of each key and value */ - unifyfs_key_lens[count] = sizeof(unifyfs_key_t); - unifyfs_val_lens[count] = sizeof(unifyfs_val_t); - - /* increment the number of keys we're deleting */ - count++; - } - } - - /* batch set file extent key/values from MDHIM */ - if (count > 0) { - ret = unifyfs_set_file_extents(count, - unifyfs_keys, unifyfs_key_lens, - unifyfs_vals, unifyfs_val_lens); - if (ret != UNIFYFS_SUCCESS) { - /* TODO: need proper error handling */ - LOGERR("unifyfs_set_file_extents() failed"); - goto truncate_rewrite_exit; - } - } - -truncate_rewrite_exit: - /* clean up memory */ - - if (NULL != unifyfs_keys) { - free_key_array(unifyfs_keys); - } - - if (NULL != unifyfs_vals) { - free_value_array(unifyfs_vals); - } - - if (NULL != unifyfs_key_lens) { - free(unifyfs_key_lens); - } - - if (NULL != unifyfs_val_lens) { - free(unifyfs_val_lens); - } - - return ret; -} - -static int mdhim_truncate(unifyfs_fops_ctx_t* ctx, int gfid, off_t len) -{ - size_t newsize = (size_t) len; - - /* set offset and length to request *all* key/value pairs - * for this file */ - size_t offset = 0; - - /* want to pick the highest integer offset value a file - * could have here */ - size_t length = (SIZE_MAX >> 1) - 1; - - /* get the locations of all the read requests from the - * key-value store*/ - unifyfs_key_t key1, key2; - - /* create key to describe first byte we'll read */ - key1.gfid = gfid; - key1.offset = offset; - - /* create key to describe last byte we'll read */ - key2.gfid = gfid; - key2.offset = offset + length - 1; - - /* set up input params to specify range lookup */ - unifyfs_key_t* unifyfs_keys[2] = {&key1, &key2}; - int key_lens[2] = {sizeof(unifyfs_key_t), sizeof(unifyfs_key_t)}; - - /* look up all entries in this range */ - int num_vals = 0; - unifyfs_keyval_t* keyvals = NULL; - int rc = unifyfs_get_file_extents(2, unifyfs_keys, key_lens, - &num_vals, &keyvals); - if (UNIFYFS_SUCCESS != rc) { - /* failed to look up extents, bail with error */ - return UNIFYFS_FAILURE; - } - - /* compute our file size by iterating over each file - * segment and taking the max logical offset */ - int i; - size_t filesize = 0; - for (i = 0; i < num_vals; i++) { - /* get pointer to next key value pair */ - unifyfs_keyval_t* kv = &keyvals[i]; - - /* get last byte offset for this segment of the file */ - size_t last_offset = kv->key.offset + kv->val.len; - - /* update our filesize if this offset is bigger than the current max */ - if (last_offset > filesize) { - filesize = last_offset; - } - } - - /* get filesize as recorded in metadata, which may be bigger if - * user issued an ftruncate on the file to extend it past the - * last write */ - size_t filesize_meta = filesize; - - /* given the global file id, look up file attributes - * from key/value store */ - unifyfs_file_attr_t fattr; - rc = unifyfs_get_file_attribute(gfid, &fattr); - if (rc == UNIFYFS_SUCCESS) { - /* found file attribute for this file, now get its size */ - filesize_meta = fattr.size; - } else { - /* failed to find file attributes for this file */ - goto truncate_exit; - } - - /* take maximum of last write and file size from metadata */ - if (filesize_meta > filesize) { - filesize = filesize_meta; - } - - /* may need to throw away and rewrite keys if shrinking file */ - if (newsize < filesize) { - /* delete any key that extends beyond new file size */ - rc = truncate_delete_keys(newsize, num_vals, keyvals); - if (rc != UNIFYFS_SUCCESS) { - goto truncate_exit; - } - - /* rewrite any key that overlaps new file size */ - rc = truncate_rewrite_keys(newsize, num_vals, keyvals); - if (rc != UNIFYFS_SUCCESS) { - goto truncate_exit; - } - } - - /* update file size field with latest size */ - fattr.size = newsize; - rc = unifyfs_set_file_attribute(1, 0, &fattr); - if (rc != UNIFYFS_SUCCESS) { - /* failed to update file attributes with new file size */ - goto truncate_exit; - } - - rc = unifyfs_invoke_truncate_rpc(gfid, newsize); - if (rc) { - LOGERR("truncate rpc failed"); - } - -truncate_exit: - - /* free off key/value buffer returned from get_file_extents */ - if (NULL != keyvals) { - free(keyvals); - keyvals = NULL; - } - - return rc; -} - -static int mdhim_laminate(unifyfs_fops_ctx_t* ctx, int gfid) -{ - int rc = UNIFYFS_SUCCESS; - - /* given the global file id, look up file attributes - * from key/value store */ - unifyfs_file_attr_t attr = { 0, }; - int ret = mdhim_metaget(ctx, gfid, &attr); - if (ret != UNIFYFS_SUCCESS) { - /* failed to find attributes for the file */ - return ret; - } - - /* if item is not a file, bail with error */ - mode_t mode = (mode_t) attr.mode; - if ((mode & S_IFMT) != S_IFREG) { - /* item is not a regular file */ - LOGERR("ERROR: only regular files can be laminated (gfid=%d)", gfid); - return EINVAL; - } - - /* lookup current file size */ - size_t filesize; - ret = mdhim_filesize(ctx, gfid, &filesize); - if (ret != UNIFYFS_SUCCESS) { - /* failed to get file size for file */ - LOGERR("lamination file size calculation failed (gfid=%d)", gfid); - return ret; - } - - /* update fields in metadata */ - attr.size = filesize; - attr.is_laminated = 1; - - /* update metadata, set size and laminate */ - rc = unifyfs_set_file_attribute(1, 1, &attr); - if (rc != UNIFYFS_SUCCESS) { - LOGERR("lamination metadata update failed (gfid=%d)", gfid); - } - - return rc; -} - -static int mdhim_unlink(unifyfs_fops_ctx_t* ctx, int gfid) -{ - int rc = UNIFYFS_SUCCESS; - - /* given the global file id, look up file attributes - * from key/value store */ - unifyfs_file_attr_t attr; - int ret = unifyfs_get_file_attribute(gfid, &attr); - if (ret != UNIFYFS_SUCCESS) { - /* failed to find attributes for the file */ - return ret; - } - - /* if item is a file, call truncate to free space */ - mode_t mode = (mode_t) attr.mode; - if ((mode & S_IFMT) == S_IFREG) { - /* item is regular file, truncate to 0 */ - ret = mdhim_truncate(ctx, gfid, 0); - if (ret != UNIFYFS_SUCCESS) { - /* failed to delete write extents for file, - * let's leave the file attributes in place */ - return ret; - } - } - - /* delete metadata */ - ret = unifyfs_delete_file_attribute(gfid); - if (ret != UNIFYFS_SUCCESS) { - rc = ret; - } - - rc = unifyfs_invoke_unlink_rpc(gfid); - if (rc) { - LOGERR("unlink rpc failed (ret=%d)", rc); - } - - return rc; -} - - -/* given a set of input key pairs, where each pair describes the first - * and last byte offset of a data range, refer to our local extent map - * and generate keyval responses for any ranges covering data that is - * local to the server, generate new key pairs to describe remaining - * holes that will be queried against the global key/value store, - * the list of output keys, key lengths, and keyvals are allocated - * and returned to be freed by the caller */ -static int get_local_keyvals( - int num_keys, /* number of input keys */ - unifyfs_key_t** keys, /* list of input keys */ - int* keylens, /* list of input key lengths */ - int* out_global, /* number of output keys for server */ - unifyfs_key_t*** out_keys, /* list of output keys */ - int** out_keylens, /* list of output key lengths */ - int* num_keyvals, /* number of output keyvals from local data */ - unifyfs_keyval_t** keyvals) /* list of output keyvals */ -{ - /* initialize output parameters */ - *out_global = 0; - *out_keys = NULL; - *out_keylens = NULL; - *num_keyvals = 0; - *keyvals = NULL; - - /* allocate memory to copy key/value data */ - int max_keyvals = UNIFYFS_MAX_META_SPLIT_COUNT; - unifyfs_keyval_t* kvs_local = (unifyfs_keyval_t*) calloc( - max_keyvals, sizeof(unifyfs_keyval_t)); - if (NULL == kvs_local) { - LOGERR("failed to allocate keyvals"); - return (int)UNIFYFS_ERROR_MDHIM; - } - - /* allocate memory to define remaining keys to - * search in global store */ - unifyfs_key_t** keys_global = alloc_key_array(max_keyvals); - if (NULL == keys_global) { - LOGERR("failed to allocate keys"); - free(kvs_local); - return (int)UNIFYFS_ERROR_MDHIM; - } - - /* allocate memory to define key lengths for remaining keys to - * search in global store */ - int* keylens_global = (int*) calloc(max_keyvals, sizeof(int)); - if (NULL == keylens_global) { - LOGERR("failed to allocate keylens"); - free_key_array(keys_global); - free(kvs_local); - return (int)UNIFYFS_ERROR_MDHIM; - } - - /* counters for the number of local keyvals we create and the - * number of keys we generate for the global key/value store */ - int count_global = 0; - int count_local = 0; - - int i; - for (i = 0; i < num_keys; i += 2) { - /* get next key pair that describe start and end offsets */ - unifyfs_key_t* k1 = keys[i+0]; - unifyfs_key_t* k2 = keys[i+1]; - - /* get gfid, start, and end offset of this pair */ - int gfid = k1->gfid; - size_t start = k1->offset; - size_t end = k2->offset; - - /* we'll define key/values in these temp arrays that correspond - * to extents we have locally */ - unifyfs_key_t tmpkeys[UNIFYFS_MAX_META_SPLIT_COUNT]; - unifyfs_val_t tmpvals[UNIFYFS_MAX_META_SPLIT_COUNT]; - - /* look up any entries we can find in our local extent map */ - int num_local = 0; - int ret = unifyfs_inode_span_extents(gfid, start, end, - UNIFYFS_MAX_META_SPLIT_COUNT, - tmpkeys, tmpvals, &num_local); - if (ret) { - LOGERR("failed to span extents (gfid=%d)", gfid); - // now what? - } - - /* iterate over local keys, create new keys to pass to server - * for any holes in our local extents */ - int j; - size_t nextstart = start; - for (j = 0; j < num_local; j++) { - /* get next key/value returned from local extent */ - unifyfs_key_t* k = &tmpkeys[j]; - unifyfs_val_t* v = &tmpvals[j]; - - /* if we have a gap in our data, - * we need to ask the global key/value store */ - if (nextstart < k->offset) { - /* we're missing a section of bytes, so create a key - * pair to search for this hole in the global key/value - * store */ - - /* check that we don't overflow the global array */ - if (count_global + 2 > max_keyvals) { - /* exhausted our space */ - free(keylens_global); - free_key_array(keys_global); - free(kvs_local); - return ENOMEM; - } - - /* first key is for starting offset of the hole, - * which is defined in next start */ - unifyfs_key_t* gk1 = keys_global[count_global]; - gk1->gfid = gfid; - gk1->offset = nextstart; - keylens_global[count_global] = sizeof(unifyfs_key_t); - count_global++; - - /* second key is for ending offset of the hole, - * which will be the offset of the byte that comes - * just before the offset of the current key */ - unifyfs_key_t* gk2 = keys_global[count_global]; - gk2->gfid = gfid; - gk2->offset = k->offset - 1; - keylens_global[count_global] = sizeof(unifyfs_key_t); - count_global++; - } else { - /* otherwise we have a local extent that matches, - * copy the corresponding key/value pair into the - * local output array */ - - /* check that we don't overflow the local array */ - if (count_local + 1 > max_keyvals) { - /* exhausted our space */ - free(keylens_global); - free_key_array(keys_global); - free(kvs_local); - return ENOMEM; - } - - /* create a key/value describing the - * current local extent */ - - /* get pointer to next key/val */ - unifyfs_keyval_t* kv = &kvs_local[count_local]; - - /* copy in the key and value generated from the call - * to tree_span into our array of local key/value pairs */ - memcpy(&kv->key, k, sizeof(unifyfs_key_t)); - memcpy(&kv->val, v, sizeof(unifyfs_val_t)); - - /* increase the number of keyvals we've found locally */ - count_local++; - } - - /* advance to start of next segment we're looking for */ - nextstart = k->offset + v->len; - } - - /* verify that we covered the full range, create a key pair - * to look in the global key/value store for any trailing hole */ - if (nextstart <= end) { - /* check that we don't overflow the global array */ - if (count_global + 2 > max_keyvals) { - /* exhausted our space */ - free(keylens_global); - free_key_array(keys_global); - free(kvs_local); - return ENOMEM; - } - - /* first key is for starting offset of the hole, - * which is defined in next start */ - unifyfs_key_t* gk1 = keys_global[count_global]; - gk1->gfid = gfid; - gk1->offset = nextstart; - keylens_global[count_global] = sizeof(unifyfs_key_t); - count_global++; - - /* second key is for ending offset of the hole */ - unifyfs_key_t* gk2 = keys_global[count_global]; - gk2->gfid = gfid; - gk2->offset = end; - keylens_global[count_global] = sizeof(unifyfs_key_t); - count_global++; - } - } - - /* set output values */ - *out_global = count_global; - *out_keys = keys_global; - *out_keylens = keylens_global; - *num_keyvals = count_local; - *keyvals = kvs_local; - - return UNIFYFS_SUCCESS; -} - -static int create_gfid_chunk_reads(reqmgr_thrd_t* thrd_ctrl, int gfid, - int app_id, int client_id, int num_keys, - unifyfs_key_t** keys, int* keylens) -{ - int rc = UNIFYFS_SUCCESS; - - int num_vals = 0; - unifyfs_keyval_t* keyvals = NULL; - - /* not using our local extent map, - * lookup all keys from global key/value store */ - rc = unifyfs_get_file_extents(num_keys, keys, keylens, - &num_vals, &keyvals); - if (rc != UNIFYFS_SUCCESS) { - LOGERR("failed to lookup keyvals from global key/val store"); - return rc; - } - - /* this is to maintain limits imposed in previous code - * that would throw fatal errors */ - if (num_vals >= UNIFYFS_MAX_META_SPLIT_COUNT || - num_vals >= UNIFYFS_MAX_META_PER_SEND) { - LOGERR("too many key/values returned in range lookup"); - if (NULL != keyvals) { - free(keyvals); - keyvals = NULL; - } - return ENOMEM; - } - - if (UNIFYFS_SUCCESS != rc) { - /* failed to find any key / value pairs */ - rc = UNIFYFS_FAILURE; - } else { - /* if we get more than one write index entry - * sort them by file id and then by delegator rank */ - if (num_vals > 1) { - qsort(keyvals, (size_t)num_vals, sizeof(unifyfs_keyval_t), - unifyfs_keyval_compare); - } - - server_read_req_t* rdreq = rm_reserve_read_req(thrd_ctrl); - if (NULL == rdreq) { - rc = UNIFYFS_FAILURE; - } else { - rdreq->app_id = app_id; - rdreq->client_id = client_id; - /* TODO: rdreq->extent was removed - * rdreq->extent.gfid = gfid; - * rdreq->extent.errcode = EINPROGRESS; - */ - rc = rm_create_chunk_requests(thrd_ctrl, rdreq, - num_vals, keyvals); - if (rc != (int)UNIFYFS_SUCCESS) { - rm_release_read_req(thrd_ctrl, rdreq); - } - } - } - - /* free off key/value buffer returned from get_file_extents */ - if (NULL != keyvals) { - free(keyvals); - keyvals = NULL; - } - - return rc; -} - -static int mdhim_read(unifyfs_fops_ctx_t* ctx, - int gfid, off_t offset, size_t length) -{ - /* get application client */ - int app_id = ctx->app_id; - int client_id = ctx->client_id; - app_client* client = get_app_client(app_id, client_id); - if (NULL == client) { - return (int)UNIFYFS_FAILURE; - } - - /* get thread control structure */ - reqmgr_thrd_t* thrd_ctrl = client->reqmgr; - - /* get chunks corresponding to requested client read extent - * - * Generate a pair of keys for the read request, representing the start - * and end offset. MDHIM returns all key-value pairs that fall within - * the offset range. - * - * TODO: this is specific to the MDHIM in the source tree and not portable - * to other KV-stores. This needs to be revisited to utilize some - * other mechanism to retrieve all relevant key-value pairs from the - * KV-store. - */ - - /* count number of slices this range covers */ - size_t slices = meta_num_slices(offset, length); - if (slices >= UNIFYFS_MAX_META_SPLIT_COUNT) { - LOGERR("Error allocating buffers"); - return ENOMEM; - } - - /* allocate key storage */ - size_t key_cnt = slices * 2; - unifyfs_key_t** keys = alloc_key_array(key_cnt); - int* key_lens = (int*) calloc(key_cnt, sizeof(int)); - if ((NULL == keys) || - (NULL == key_lens)) { - // this is a fatal error - // TODO: we need better error handling - LOGERR("Error allocating buffers"); - return ENOMEM; - } - - /* split range of read request at boundaries used for - * MDHIM range query */ - split_request(keys, key_lens, gfid, offset, length); - - /* queue up the read operations */ - int rc = create_gfid_chunk_reads(thrd_ctrl, gfid, app_id, client_id, - key_cnt, keys, key_lens); - - /* free memory allocated for key storage */ - free_key_array(keys); - free(key_lens); - - return rc; -} - -static int mdhim_mread(unifyfs_fops_ctx_t* ctx, size_t num_req, void* reqbuf) -{ - int rc = UNIFYFS_SUCCESS; - int app_id = ctx->app_id; - int client_id = ctx->client_id; - unifyfs_extent_t* req; - unifyfs_extent_t* reqs = (unifyfs_extent_t*)reqbuf; - - /* get application client */ - app_client* client = get_app_client(app_id, client_id); - if (NULL == client) { - return (int)UNIFYFS_FAILURE; - } - - /* get thread control structure */ - reqmgr_thrd_t* thrd_ctrl = client->reqmgr; - - /* count up number of slices these request cover */ - int i; - size_t slices = 0; - for (i = 0; i < num_req; i++) { - req = reqs + i; - - /* get offset and length of next request */ - size_t off = req->offset; - size_t len = req->length; - - /* add in number of slices this request needs */ - slices += meta_num_slices(off, len); - } - if (slices >= UNIFYFS_MAX_META_SPLIT_COUNT) { - LOGERR("Error allocating buffers"); - return ENOMEM; - } - - /* allocate key storage */ - size_t key_cnt = slices * 2; - unifyfs_key_t** keys = alloc_key_array(key_cnt); - int* key_lens = (int*) calloc(key_cnt, sizeof(int)); - if ((NULL == keys) || - (NULL == key_lens)) { - // this is a fatal error - // TODO: we need better error handling - LOGERR("Error allocating buffers"); - return ENOMEM; - } - - /* get chunks corresponding to requested client read extents */ - int ret; - int num_keys = 0; - int last_gfid = -1; - for (i = 0; i < num_req; i++) { - req = reqs + i; - - /* get the file id for this request */ - int gfid = req->gfid; - - /* if we have switched to a different file, create chunk reads - * for the previous file */ - if (i && (gfid != last_gfid)) { - /* create requests for all extents of last_gfid */ - ret = create_gfid_chunk_reads(thrd_ctrl, last_gfid, - app_id, client_id, - num_keys, keys, key_lens); - if (ret != UNIFYFS_SUCCESS) { - LOGERR("Error creating chunk reads for gfid=%d", last_gfid); - rc = ret; - } - - /* reset key counter for the current gfid */ - num_keys = 0; - } - - /* get offset and length of current read request */ - size_t off = req->offset; - size_t len = req->length; - LOGDBG("gfid:%d, offset:%zu, length:%zu", gfid, off, len); - - /* Generate a pair of keys for each read request, representing - * the start and end offsets. MDHIM returns all key-value pairs that - * fall within the offset range. - * - * TODO: this is specific to the MDHIM in the source tree and not - * portable to other KV-stores. This needs to be revisited to - * utilize some other mechanism to retrieve all relevant KV - * pairs from the KV-store. - */ - - /* split range of read request at boundaries used for - * MDHIM range query */ - int used = split_request(&keys[num_keys], &key_lens[num_keys], - gfid, off, len); - num_keys += used; - - /* keep track of the last gfid value that we processed */ - last_gfid = gfid; - } - - /* create requests for all extents of final gfid */ - ret = create_gfid_chunk_reads(thrd_ctrl, last_gfid, - app_id, client_id, - num_keys, keys, key_lens); - if (ret != UNIFYFS_SUCCESS) { - LOGERR("Error creating chunk reads for gfid=%d", last_gfid); - rc = ret; - } - - /* free memory allocated for key storage */ - free_key_array(keys); - free(key_lens); - - return rc; -} - -static struct unifyfs_fops _fops_mdhim = { - .name = "mdhim", - .init = mdhim_init, - .metaget = mdhim_metaget, - .metaset = mdhim_metaset, - .fsync = mdhim_fsync, - .filesize = mdhim_filesize, - .truncate = mdhim_truncate, - .laminate = mdhim_laminate, - .unlink = mdhim_unlink, - .read = mdhim_read, - .mread = mdhim_mread, -}; - -struct unifyfs_fops* unifyfs_fops_impl = &_fops_mdhim; - diff --git a/server/src/unifyfs_fops_rpc.c b/server/src/unifyfs_fops_rpc.c index d42b13cac..a7889bf9e 100644 --- a/server/src/unifyfs_fops_rpc.c +++ b/server/src/unifyfs_fops_rpc.c @@ -35,6 +35,10 @@ int rpc_metaget(unifyfs_fops_ctx_t* ctx, int gfid, unifyfs_file_attr_t* attr) { + if (gfid == ctx->app_id) { + /* should always have a local copy of mountpoint attrs */ + return sm_get_fileattr(gfid, attr); + } return unifyfs_invoke_metaget_rpc(gfid, attr); } @@ -52,7 +56,8 @@ int rpc_metaset(unifyfs_fops_ctx_t* ctx, */ static int rpc_fsync(unifyfs_fops_ctx_t* ctx, - int gfid) + int gfid, + client_rpc_req_t* client_req) { size_t i; @@ -86,9 +91,11 @@ int rpc_fsync(unifyfs_fops_ctx_t* ctx, /* the sync rpc now contains extents from a single file/gfid */ assert(gfid == index_entry[0].gfid); + server_rpc_req_t* svr_req = malloc(sizeof(*svr_req)); + int* pending_gfid = malloc(sizeof(int)); extent_metadata* extents = calloc(num_extents, sizeof(*extents)); - if (NULL == extents) { - LOGERR("failed to allocate memory for local_extents"); + if ((NULL == svr_req) || (NULL == pending_gfid) || (NULL == extents)) { + LOGERR("failed to allocate memory for local extents sync"); return ENOMEM; } @@ -104,20 +111,25 @@ int rpc_fsync(unifyfs_fops_ctx_t* ctx, } /* update local inode state first */ - ret = unifyfs_inode_add_extents(gfid, num_extents, extents); + ret = unifyfs_inode_add_pending_extents(gfid, client_req, + num_extents, extents); if (ret) { - LOGERR("failed to add local extents (gfid=%d, ret=%d)", gfid, ret); + LOGERR("failed to add pending local extents (gfid=%d, ret=%d)", + gfid, ret); + free(pending_gfid); + free(svr_req); return ret; + } else { + /* then ask svcmgr to process the pending extent sync(s) */ + *pending_gfid = gfid; + svr_req->req_type = UNIFYFS_SERVER_PENDING_SYNC; + svr_req->handle = HG_HANDLE_NULL; + svr_req->input = (void*) pending_gfid; + svr_req->bulk_buf = NULL; + svr_req->bulk_sz = 0; + ret = sm_submit_service_request(svr_req); } - /* then update owner inode state */ - ret = unifyfs_invoke_add_extents_rpc(gfid, num_extents, extents); - if (ret) { - LOGERR("failed to add extents (gfid=%d, ret=%d)", gfid, ret); - } - - free(extents); - return ret; } diff --git a/server/src/unifyfs_global.h b/server/src/unifyfs_global.h index 1f56705bb..677e80f60 100644 --- a/server/src/unifyfs_global.h +++ b/server/src/unifyfs_global.h @@ -46,6 +46,7 @@ // common headers #include "arraylist.h" +#include "compare_fn.h" #include "tree.h" #include "unifyfs_client.h" #include "unifyfs_const.h" @@ -141,7 +142,6 @@ typedef struct { struct reqmgr_thrd; - /** * Structure to maintain application client state, including * logio and shared memory contexts, margo rpc address, etc. @@ -200,8 +200,20 @@ unifyfs_rc cleanup_app_client(app_config* app, app_client* clnt); unifyfs_rc add_failed_client(int app_id, int client_id); -/* publish the pids of all servers to a shared file */ -int unifyfs_publish_server_pids(void); +/* methods for pending remote metaget() bookkeeping */ +unifyfs_rc add_pending_metaget(int gfid); + +bool check_pending_metaget(int gfid); + +unifyfs_rc clear_pending_metaget(int gfid); + + + +/* notify local server main thread that bootstrap is complete */ +int unifyfs_signal_bootstrap_complete(void); + +/* participate in collective server bootstrap completion process */ +int unifyfs_complete_bootstrap(void); /* report the pid for a server with given rank */ int unifyfs_report_server_pid(int rank, int pid); diff --git a/server/src/unifyfs_group_rpc.c b/server/src/unifyfs_group_rpc.c index a8dbd6d4e..41840d85f 100644 --- a/server/src/unifyfs_group_rpc.c +++ b/server/src/unifyfs_group_rpc.c @@ -66,6 +66,215 @@ static int forward_child_request(void* input_ptr, return ret; } +/* Helper function for the UNIFYFS_SERVER_BCAST_RPC_METAGET case in + * get_child_response(). Handles merging the output from a child + * with that from the parent. */ +static int merge_metaget_all_bcast_outputs( + metaget_all_bcast_out_t* p_out, + metaget_all_bcast_out_t* c_out, + hg_handle_t p_hdl, + hg_handle_t c_hdl) +{ + int ret = UNIFYFS_SUCCESS; + hg_return_t hret = HG_SUCCESS; + hg_return_t bulk_create_hret = HG_SUCCESS; + + int32_t parent_num_files = p_out->num_files; + int32_t child_num_files = c_out->num_files; + hg_size_t child_buf_size = child_num_files * sizeof(unifyfs_file_attr_t); + + /* Quick optimization: If there's no files for the child or + * parent, we can exit now. (In fact, trying to perform a bulk transfer + * of size 0 will fail, so if we don't bail now, we'd just have to have + * more checks for this case down below.) */ + if ((0 == parent_num_files) && (0 == child_num_files)) { + return UNIFYFS_SUCCESS; + } + + // Some variables we'll need for the bulk transfer(s) + unifyfs_file_attr_t* child_attr_list = NULL; + hg_bulk_t local_bulk; + const struct hg_info* info = NULL; + hg_addr_t server_addr; + margo_instance_id mid; + + // If the number of child files is 0, don't bother with the bulk + // transfer - it will just fail with HG_INVALID_ARG + if (child_num_files) { + + // Pull the bulk data (the list of file_attr structs) over + child_attr_list = calloc(child_num_files, sizeof(unifyfs_file_attr_t)); + if (!child_attr_list) { + return ENOMEM; + } + + // Figure out some margo-specific info that we need for the transfer + info = margo_get_info(c_hdl); + server_addr = info->addr; + mid = margo_hg_handle_get_instance(c_hdl); + + hg_size_t segment_sizes[1] = { child_buf_size }; + void* segment_ptrs[1] = { (void*)child_attr_list }; + bulk_create_hret = + margo_bulk_create(mid, 1, segment_ptrs, segment_sizes, + HG_BULK_WRITE_ONLY, &local_bulk); + if (HG_SUCCESS != bulk_create_hret) { + LOGERR("margo_bulk_create() failed - %s", + HG_Error_to_string(bulk_create_hret)); + free(child_attr_list); + return UNIFYFS_ERROR_MARGO; + } + + hret = margo_bulk_transfer(mid, HG_BULK_PULL, server_addr, + c_out->file_meta, 0, local_bulk, 0, + child_buf_size); + if (HG_SUCCESS != hret) { + LOGERR("margo_bulk_transfer() failed - %s", + HG_Error_to_string(hret)); + margo_bulk_free(local_bulk); + free(child_attr_list); + return UNIFYFS_ERROR_MARGO; + } + + margo_bulk_free(local_bulk); + } + + /* OK, file attrs from the child (assuming there were any files) are now + * stored in child_attr_list. And if there were 0 files, then + * child_attr_list is NULL. */ + + // Now get the bulk data from the parent (assuming there is any) + unifyfs_file_attr_t* parent_attr_list = NULL; + if (parent_num_files + child_num_files > 0) { + parent_attr_list = calloc(parent_num_files + child_num_files, + sizeof(unifyfs_file_attr_t)); + /* Note: Deliberately allocating enough space for the child file attrs, + * since we're going to be copying them in anyway. + * Also, we had to check to see if there actually was any need to + * allocate memory because if you pass 0 into calloc(), you'll likely + * get a NULL back, and that would confuse the error checking on the + * next lines. */ + if (!parent_attr_list) { + free(child_attr_list); + return ENOMEM; + } + } + + if (parent_num_files) { + hg_size_t parent_buf_size = + parent_num_files * sizeof(unifyfs_file_attr_t); + + // Figure out some margo-specific info that we need for the transfer + info = margo_get_info(p_hdl); + server_addr = info->addr; + // address of the bulk data on the server side + mid = margo_hg_handle_get_instance(p_hdl); + + hg_size_t segment_sizes[1] = { parent_buf_size }; + void* segment_ptrs[1] = { (void*)parent_attr_list }; + bulk_create_hret = + margo_bulk_create(mid, 1, segment_ptrs, segment_sizes, + HG_BULK_WRITE_ONLY, &local_bulk); + + if (HG_SUCCESS != bulk_create_hret) { + LOGERR("margo_bulk_create() failed - %s", + HG_Error_to_string(bulk_create_hret)); + free(parent_attr_list); + free(child_attr_list); + return UNIFYFS_ERROR_MARGO; + } + + /* It would be nice if we didn't have to actually do a margo transfer + * here. The data we need exists in our current address space + * somewhere. Unfortunately, we don't know where because that's + * hidden from us by Margo. The best we can do is hope that Margo is + * optimized for this case and this transfer ends up just being a + * mem copy. */ + hret = margo_bulk_transfer(mid, HG_BULK_PULL, server_addr, + p_out->file_meta, 0, local_bulk, 0, + parent_buf_size); + if (HG_SUCCESS != hret) { + LOGERR("margo_bulk_transfer() failed - %s", + HG_Error_to_string(hret)); + margo_bulk_free(local_bulk); + free(parent_attr_list); + free(child_attr_list); + return UNIFYFS_ERROR_MARGO; + } + margo_bulk_free(local_bulk); + } + + /* OK, file attrs from the parent (assuming there were any files) are now + * stored in parent_attr_list. And parent_attr_list is actually big + * enough to hold all the file attrs from the parent and child. + * + * The next step is to append the child filenames string to the parent's, + * and update the string offsets stored in the child file attrs' filename + * members. */ + + uint64_t parent_filenames_len = + p_out->filenames ? strlen(p_out->filenames) : 0; + uint64_t child_filenames_len = + c_out->filenames ? strlen(c_out->filenames) : 0; + + char* new_filenames = calloc(parent_filenames_len+child_filenames_len+1, + sizeof(char)); + if (!new_filenames) { + free(parent_attr_list); + free(child_attr_list); + return ENOMEM; + } + + if (p_out->filenames) { + strcpy(new_filenames, p_out->filenames); + } + if (c_out->filenames) { + strcat(new_filenames, c_out->filenames); + } + free(p_out->filenames); + p_out->filenames = new_filenames; + + // Now update all the offset values in the child_attr_list + for (unsigned int i = 0; i < child_num_files; i++) { + uint64_t new_offset = (uint64_t)child_attr_list[i].filename + + parent_filenames_len; + child_attr_list[i].filename = (char*)new_offset; + } + + /* Now we need to append the child file attrs to the parent file attrs, + * create a new hg_bulk and replace the old parent bulk with the new one. + */ + memcpy(&parent_attr_list[parent_num_files], child_attr_list, + child_buf_size); + free(child_attr_list); + + size_t parent_buf_size = + (parent_num_files + child_num_files) * sizeof(unifyfs_file_attr_t); + hg_size_t segment_sizes[1] = { parent_buf_size }; + void* segment_ptrs[1] = { (void*)parent_attr_list }; + + // Save the parent's old bulk so that we can restore it if the + // bulk create fails, or free it if the create succeeds + hg_bulk_t parent_old_bulk = p_out->file_meta; + + hret = margo_bulk_create(unifyfsd_rpc_context->svr_mid, 1, + segment_ptrs, segment_sizes, + HG_BULK_READ_ONLY, &p_out->file_meta); + if (hret != HG_SUCCESS) { + LOGERR("margo_bulk_create() failed - %s", HG_Error_to_string(hret)); + p_out->file_meta = parent_old_bulk; + free(parent_attr_list); + return UNIFYFS_ERROR_MARGO; + } + + margo_bulk_free(parent_old_bulk); + + /* Lastly, update the num_files value */ + p_out->num_files += child_num_files; + + return ret; +} + static int get_child_response(coll_request* coll_req, hg_handle_t chdl) { @@ -84,6 +293,17 @@ static int get_child_response(coll_request* coll_req, void* output = coll_req->output; switch (coll_req->req_type) { + case UNIFYFS_SERVER_BCAST_RPC_BOOTSTRAP: { + bootstrap_complete_bcast_out_t* cbbo = + (bootstrap_complete_bcast_out_t*) out; + bootstrap_complete_bcast_out_t* bbo = + (bootstrap_complete_bcast_out_t*) output; + child_ret = cbbo->ret; + if ((NULL != bbo) && (child_ret != UNIFYFS_SUCCESS)) { + bbo->ret = child_ret; + } + break; + } case UNIFYFS_SERVER_BCAST_RPC_EXTENTS: { extent_bcast_out_t* cebo = (extent_bcast_out_t*) out; extent_bcast_out_t* ebo = (extent_bcast_out_t*) output; @@ -138,6 +358,33 @@ static int get_child_response(coll_request* coll_req, } break; } + case UNIFYFS_SERVER_BCAST_RPC_METAGET: { + // NOTE: This case is different. It's currently the only + // case that actually returns more than just a single error + // code up the tree. + metaget_all_bcast_out_t* cmabo = + (metaget_all_bcast_out_t*) out; + metaget_all_bcast_out_t* mabo = + (metaget_all_bcast_out_t*) output; + child_ret = cmabo->ret; + if ((NULL != mabo) && (child_ret != UNIFYFS_SUCCESS)) { + mabo->ret = child_ret; + } + if ((NULL != cmabo) && (NULL != mabo)) { + merge_metaget_all_bcast_outputs( + mabo, cmabo, coll_req->progress_hdl, chdl); + } else { + /* One or both of the output structures is missing. + * (This shouldn't ever happen.) */ + LOGERR( + "Missing required output structs when handling " + "UNIFYFS_SERVER_BCAST_RPC_METAGET child responses!"); + LOGERR("Parent output struct: 0x%lX", (uint64_t)mabo); + LOGERR("Child output struct: 0x%lX", (uint64_t)cmabo); + mabo->ret = UNIFYFS_ERROR_BADCONFIG; + } + break; + } default: child_ret = UNIFYFS_FAILURE; LOGERR("invalid collective request type %d", @@ -255,16 +502,16 @@ static coll_request* collective_create(server_rpc_e req_type, * before calling bcast_progress_rpc(). */ - int rc = ABT_mutex_create(&coll_req->child_resp_valid_mut); + int rc = ABT_mutex_create(&coll_req->resp_valid_sync); if (ABT_SUCCESS != rc) { LOGERR("ABT_mutex_create failed"); free(coll_req); return NULL; } - rc = ABT_cond_create(&coll_req->child_resp_valid); + rc = ABT_cond_create(&coll_req->resp_valid_cond); if (ABT_SUCCESS != rc) { LOGERR("ABT_cond_create failed"); - ABT_mutex_free(&coll_req->child_resp_valid_mut); + ABT_mutex_free(&coll_req->resp_valid_sync); free(coll_req); return NULL; } @@ -273,8 +520,8 @@ static coll_request* collective_create(server_rpc_e req_type, UNIFYFS_BCAST_K_ARY, &(coll_req->tree)); if (rc) { LOGERR("unifyfs_tree_init() failed"); - ABT_mutex_free(&coll_req->child_resp_valid_mut); - ABT_cond_free(&coll_req->child_resp_valid); + ABT_mutex_free(&coll_req->resp_valid_sync); + ABT_cond_free(&coll_req->resp_valid_cond); free(coll_req); return NULL; } @@ -288,8 +535,8 @@ static coll_request* collective_create(server_rpc_e req_type, free(coll_req->child_hdls); free(coll_req->child_reqs); /* Note: calling free() on NULL is explicitly allowed */ - ABT_mutex_free(&coll_req->child_resp_valid_mut); - ABT_cond_free(&coll_req->child_resp_valid); + ABT_mutex_free(&coll_req->resp_valid_sync); + ABT_cond_free(&coll_req->resp_valid_cond); free(coll_req); return NULL; } @@ -367,8 +614,8 @@ void collective_cleanup(coll_request* coll_req) } /* Release the Argobots mutex and condition variable */ - ABT_cond_free(&coll_req->child_resp_valid); - ABT_mutex_free(&coll_req->child_resp_valid_mut); + ABT_cond_free(&coll_req->resp_valid_cond); + ABT_mutex_free(&coll_req->resp_valid_sync); /* free allocated memory */ if (NULL != coll_req->input) { @@ -410,6 +657,8 @@ static int collective_forward(coll_request* coll_req) /* invoke bcast request rpc on child */ margo_request* creq = coll_req->child_reqs + i; hg_handle_t* chdl = coll_req->child_hdls + i; + LOGDBG("BCAST_RPC: collective(%p) forwarding to child[%d]", + coll_req, i); int rc = forward_child_request(coll_req->input, *chdl, creq); if (rc != UNIFYFS_SUCCESS) { LOGERR("forward to child[%d] failed", i); @@ -430,6 +679,12 @@ void collective_set_local_retval(coll_request* coll_req, int val) } switch (coll_req->req_type) { + case UNIFYFS_SERVER_BCAST_RPC_BOOTSTRAP: { + bootstrap_complete_bcast_out_t* bbo = + (bootstrap_complete_bcast_out_t*) output; + bbo->ret = val; + break; + } case UNIFYFS_SERVER_BCAST_RPC_EXTENTS: { extent_bcast_out_t* ebo = (extent_bcast_out_t*) output; ebo->ret = val; @@ -460,6 +715,10 @@ void collective_set_local_retval(coll_request* coll_req, int val) ubo->ret = val; break; } + case UNIFYFS_SERVER_BCAST_RPC_METAGET: { + metaget_all_bcast_out_t* mabo = (metaget_all_bcast_out_t*) output; + mabo->ret = val; + } default: LOGERR("invalid collective request type %d", coll_req->req_type); break; @@ -485,7 +744,7 @@ int collective_finish(coll_request* coll_req) * then send the output back to the caller. If we're at the root * of the tree, though, there might be output data, but no place * to send it. */ - if ((NULL != coll_req->output) && (NULL != coll_req->resp_hdl)) { + if ((NULL != coll_req->output) && (HG_HANDLE_NULL != coll_req->resp_hdl)) { hg_return_t hret = margo_respond(coll_req->resp_hdl, coll_req->output); if (hret != HG_SUCCESS) { LOGERR("margo_respond() failed - %s", HG_Error_to_string(hret)); @@ -497,14 +756,14 @@ int collective_finish(coll_request* coll_req) /* Signal the condition variable in case there are other threads * waiting for the child responses */ - ABT_mutex_lock(coll_req->child_resp_valid_mut); - ABT_cond_signal(coll_req->child_resp_valid); + ABT_mutex_lock(coll_req->resp_valid_sync); + ABT_cond_signal(coll_req->resp_valid_cond); /* There should only be a single thread waiting on the CV, so we don't * need to use ABT_cond_broadcast() */ - ABT_mutex_unlock(coll_req->child_resp_valid_mut); + ABT_mutex_unlock(coll_req->resp_valid_sync); /* Locking the mutex before signaling is required in order to ensure * that the waiting thread has had a chance to actually call - * ABT_cond_wait() before this thread signals the CV. */ + * ABT_cond_timedwait() before this thread signals the CV. */ return ret; } @@ -554,8 +813,9 @@ int invoke_bcast_progress_rpc(coll_request* coll_req) static void bcast_progress_rpc(hg_handle_t handle) { /* assume we'll succeed */ - int32_t ret = UNIFYFS_SUCCESS; + int ret = UNIFYFS_SUCCESS; coll_request* coll = NULL; + bool cleanup_collective = false; bcast_progress_in_t in; hg_return_t hret = margo_get_input(handle, &in); @@ -563,35 +823,174 @@ static void bcast_progress_rpc(hg_handle_t handle) LOGERR("margo_get_input() failed - %s", HG_Error_to_string(hret)); ret = UNIFYFS_ERROR_MARGO; } else { - /* call collective_finish() to progress bcast operation */ coll = (coll_request*) in.coll_req; + margo_free_input(handle, &in); + + cleanup_collective = ((NULL != coll) && (coll->auto_cleanup)); + /* We have to check the auto_cleanup variable now because in the case + * where auto_cleanup is false, another thread will be freeing the + * collective. And once the memory is freed, we can't read the + * auto_cleanup variable. + * + * There's a condition variable that's signaled by collective_finish(), + * and the memory won't be freed until some time after that happens, so + * it's safe to check the variable up here. */ + + /* call collective_finish() to progress bcast operation */ LOGDBG("BCAST_RPC: bcast progress collective(%p)", coll); ret = collective_finish(coll); if (ret != UNIFYFS_SUCCESS) { - LOGERR("collective_finish() failed for coll_req(%p) (rc=%d)", + LOGERR("collective_finish() failed for collective(%p) (rc=%d)", coll, ret); } } /* finish rpc */ bcast_progress_out_t out; - out.ret = ret; + out.ret = (int32_t) ret; hret = margo_respond(handle, &out); if (hret != HG_SUCCESS) { LOGERR("margo_respond() failed - %s", HG_Error_to_string(hret)); } - if ((NULL != coll) && (coll->auto_cleanup)) { + if (cleanup_collective) { collective_cleanup(coll); } /* free margo resources */ - margo_free_input(handle, &in); margo_destroy(handle); } DEFINE_MARGO_RPC_HANDLER(bcast_progress_rpc) +/*************************************************** + * Broadcast server bootstrap completion + ***************************************************/ + +/* bootstrap complete broadcast rpc handler */ +static void bootstrap_complete_bcast_rpc(hg_handle_t handle) +{ + LOGDBG("BCAST_RPC: bootstrap handler"); + + /* assume we'll succeed */ + int ret = UNIFYFS_SUCCESS; + + coll_request* coll = NULL; + server_rpc_req_t* req = calloc(1, sizeof(*req)); + bootstrap_complete_bcast_in_t* in = calloc(1, sizeof(*in)); + bootstrap_complete_bcast_out_t* out = calloc(1, sizeof(*out)); + if ((NULL == req) || (NULL == in) || (NULL == out)) { + ret = ENOMEM; + } else { + /* get input params */ + hg_return_t hret = margo_get_input(handle, in); + if (hret != HG_SUCCESS) { + LOGERR("margo_get_input() failed - %s", HG_Error_to_string(hret)); + ret = UNIFYFS_ERROR_MARGO; + } else { + hg_id_t op_hgid = + unifyfsd_rpc_context->rpcs.bootstrap_complete_bcast_id; + server_rpc_e rpc = UNIFYFS_SERVER_BCAST_RPC_BOOTSTRAP; + coll = collective_create(rpc, handle, op_hgid, (int)(in->root), + (void*)in, (void*)out, sizeof(*out), + HG_BULK_NULL, HG_BULK_NULL, NULL); + if (NULL == coll) { + ret = ENOMEM; + } else { + ret = collective_forward(coll); + if (ret == UNIFYFS_SUCCESS) { + req->req_type = rpc; + req->coll = coll; + req->handle = handle; + req->input = (void*) in; + ret = sm_submit_service_request(req); + if (ret != UNIFYFS_SUCCESS) { + LOGERR("failed to submit coll request to svcmgr"); + } + } + } + } + } + + if (ret != UNIFYFS_SUCCESS) { + /* report failure back to caller */ + bootstrap_complete_bcast_out_t bbo; + bbo.ret = (int32_t)ret; + hg_return_t hret = margo_respond(handle, &bbo); + if (hret != HG_SUCCESS) { + LOGERR("margo_respond() failed - %s", HG_Error_to_string(hret)); + } + + if (NULL != coll) { + collective_cleanup(coll); + } else { + margo_destroy(handle); + } + } +} +DEFINE_MARGO_RPC_HANDLER(bootstrap_complete_bcast_rpc) + +/* Execute broadcast tree for 'bootstrap complete' notification */ +int unifyfs_invoke_broadcast_bootstrap_complete(void) +{ + /* assuming success */ + int ret = UNIFYFS_SUCCESS; + + LOGDBG("BCAST_RPC: starting bootstrap complete"); + coll_request* coll = NULL; + bootstrap_complete_bcast_in_t* in = calloc(1, sizeof(*in)); + if (NULL == in) { + ret = ENOMEM; + } else { + /* set input params */ + in->root = (int32_t) glb_pmi_rank; + hg_id_t op_hgid = + unifyfsd_rpc_context->rpcs.bootstrap_complete_bcast_id; + server_rpc_e rpc = UNIFYFS_SERVER_BCAST_RPC_BOOTSTRAP; + coll = collective_create(rpc, HG_HANDLE_NULL, op_hgid, + glb_pmi_rank, (void*)in, + NULL, sizeof(bootstrap_complete_bcast_out_t), + HG_BULK_NULL, HG_BULK_NULL, NULL); + if (NULL == coll) { + ret = ENOMEM; + } else { + ret = collective_forward(coll); + if (ret == UNIFYFS_SUCCESS) { + /* avoid cleanup by the progress rpc */ + coll->auto_cleanup = 0; + ABT_mutex_lock(coll->resp_valid_sync); + ret = invoke_bcast_progress_rpc(coll); + if (ret == UNIFYFS_SUCCESS) { + /* wait for all the child responses to come back */ + struct timespec timeout; + clock_gettime(CLOCK_REALTIME, &timeout); + timeout.tv_sec += 5; /* 5 sec */ + int rc = ABT_cond_timedwait(coll->resp_valid_cond, + coll->resp_valid_sync, + &timeout); + if (ABT_ERR_COND_TIMEDOUT == rc) { + LOGERR("timeout"); + ret = UNIFYFS_ERROR_TIMEOUT; + } else if (rc) { + LOGERR("failed to wait on condition (err=%d)", rc); + ret = UNIFYFS_ERROR_MARGO; + } else if (NULL != coll->output) { + bootstrap_complete_bcast_out_t* out = + (bootstrap_complete_bcast_out_t*) coll->output; + ret = out->ret; + } + } + ABT_mutex_unlock(coll->resp_valid_sync); + } else { + LOGERR("collective(%p) forward failed - cleaning up", coll); + } + collective_cleanup(coll); + } + } + + return ret; +} + /************************************************************************* * Broadcast file extents metadata *************************************************************************/ @@ -674,7 +1073,7 @@ static void extent_bcast_rpc(hg_handle_t handle) DEFINE_MARGO_RPC_HANDLER(extent_bcast_rpc) /* Execute broadcast tree for extent metadata */ -int unifyfs_invoke_broadcast_extents_rpc(int gfid) +int unifyfs_invoke_broadcast_extents(int gfid) { /* assuming success */ int ret = UNIFYFS_SUCCESS; @@ -1350,3 +1749,345 @@ int unifyfs_invoke_broadcast_unlink(int gfid) } return ret; } + +/************************************************************************* + * Broadcast metaget all request + *************************************************************************/ + +/* metaget all broacast rpc handler */ +static void metaget_all_bcast_rpc(hg_handle_t handle) +{ + LOGDBG("BCAST_RPC: metaget_all handler"); + + /* assume we'll succeed */ + int ret = UNIFYFS_SUCCESS; + + coll_request* coll = NULL; + server_rpc_req_t* req = calloc(1, sizeof(*req)); + metaget_all_bcast_in_t* in = calloc(1, sizeof(*in)); + metaget_all_bcast_out_t* out = calloc(1, sizeof(*out)); + if ((NULL == req) || (NULL == in) || (NULL == out)) { + ret = ENOMEM; + } else { + /* get input params */ + LOGDBG("BCAST_RPC: getting input params"); + hg_return_t hret = margo_get_input(handle, in); + if (hret != HG_SUCCESS) { + LOGERR("margo_get_input() failed - %s", HG_Error_to_string(hret)); + ret = UNIFYFS_ERROR_MARGO; + } else { + LOGDBG("BCAST_RPC: creating collective"); + hg_id_t op_hgid = unifyfsd_rpc_context->rpcs.metaget_all_bcast_id; + server_rpc_e rpc = UNIFYFS_SERVER_BCAST_RPC_METAGET; + coll = collective_create(rpc, handle, op_hgid, (int)(in->root), + (void*)in, (void*)out, sizeof(*out), + HG_BULK_NULL, HG_BULK_NULL, NULL); + if (NULL == coll) { + ret = ENOMEM; + } else { + LOGDBG("BCAST_RPC: forwarding collective"); + ret = collective_forward(coll); + if (ret == UNIFYFS_SUCCESS) { + req->req_type = rpc; + req->coll = coll; + req->handle = handle; + req->input = (void*) in; + req->bulk_buf = NULL; + req->bulk_sz = 0; + ret = sm_submit_service_request(req); + if (ret != UNIFYFS_SUCCESS) { + LOGERR("failed to submit coll request to svcmgr"); + } + } + } + } + } + + if (ret != UNIFYFS_SUCCESS) { + /* report failure back to caller */ + metaget_all_bcast_out_t mgabo; + mgabo.ret = (int32_t)ret; + hg_return_t hret = margo_respond(handle, &mgabo); + if (hret != HG_SUCCESS) { + LOGERR("margo_respond() failed - %s", HG_Error_to_string(hret)); + } + + if (NULL != coll) { + collective_cleanup(coll); + } else { + margo_destroy(handle); + } + } + + LOGDBG("BCAST_RPC: exiting metaget_all handler"); + +} + +DEFINE_MARGO_RPC_HANDLER(metaget_all_bcast_rpc) + +/* Execute broadcast tree for a metaget_all operation*/ +/* Upon success, file_attrs will hold data that has been allocated with + * malloc() and must be freed by the caller with free(). In the event of an + * error, the caller must *NOT* free the pointer. + * + * Note that 0 files is still considered a successful result. In that case, + * num_file_attrs will point to 0 and *file_attrs will point to NULL. */ +int unifyfs_invoke_broadcast_metaget_all(unifyfs_file_attr_t** file_attrs, + int* num_file_attrs) +{ + int ret = UNIFYFS_SUCCESS; + hg_return_t bulk_create_hret = HG_NOMEM; + hg_return_t bulk_transfer_hret = HG_OTHER_ERROR; + /* Have to assume the bulk create and bulk transfer operations failed or + * else we might try to clean up non-existent data down in the clean-up + * section. */ + + hg_bulk_t local_bulk; + + LOGDBG("BCAST_RPC: starting metaget_all broadcast"); + + coll_request* coll = NULL; + unifyfs_file_attr_t* attr_list = NULL; + unifyfs_file_attr_t* local_file_attrs = NULL; + /* attr_list holds the metadata we received from other server processes + * via the broadcast RPC. local_file_attrs holds metadata for files + * that the current server process owns. */ + metaget_all_bcast_in_t* in = calloc(1, sizeof(*in)); + metaget_all_bcast_out_t* out = calloc(1, sizeof(*out)); + if ((NULL == in) || (NULL == out)) { + ret = ENOMEM; + goto Exit_Invoke_BMA; + } + + /* get input params */ + in->root = (int32_t) glb_pmi_rank; + + hg_id_t op_hgid = unifyfsd_rpc_context->rpcs.metaget_all_bcast_id; + server_rpc_e rpc = UNIFYFS_SERVER_BCAST_RPC_METAGET; + coll = collective_create(rpc, HG_HANDLE_NULL, op_hgid, + glb_pmi_rank, (void*)in, + (void*)out, sizeof(metaget_all_bcast_out_t), + HG_BULK_NULL, HG_BULK_NULL, NULL); + /* Note: We are passing in HG_HANDLE_NULL for the response handle + * because we are the root of the tree and there's nobody for us to + * respond to. */ + + if (NULL == coll) { + ret = ENOMEM; + goto Exit_Invoke_BMA; + } + + ret = collective_forward(coll); + if (UNIFYFS_SUCCESS != ret) { + goto Exit_Invoke_BMA; + } + + /* We don't want the progress rpc to clean up for us because + * we need to get the output data */ + coll->auto_cleanup = 0; + + ABT_mutex_lock(coll->resp_valid_sync); + /* Have to lock the mutex before the bcast_progress_rpc call + * so that we're sure to be waiting on the condition + * variable before the progress thread gets to the point + * where it signals the CV. */ + + ret = invoke_bcast_progress_rpc(coll); + if (UNIFYFS_SUCCESS != ret) { + LOGERR("invoke_bcast_progress_rpc() failed with error %d", ret); + goto Exit_Invoke_BMA; + } + + /* While the broadcast RPC is running, let's fetch the metadata for + * all the files the server we're currently running as owns. */ + unsigned int num_local_files = 0; + ret = unifyfs_get_owned_files(&num_local_files, &local_file_attrs); + if (UNIFYFS_SUCCESS != ret) { + LOGERR("unifyfs_get_owned_files() failed with error %d", ret); + goto Exit_Invoke_BMA; + } + + // Wait for all the child responses to come back + struct timespec timeout; + clock_gettime(CLOCK_REALTIME, &timeout); + timeout.tv_sec += 10; /* 10 sec */ + int rc = ABT_cond_timedwait(coll->resp_valid_cond, + coll->resp_valid_sync, + &timeout); + if (ABT_ERR_COND_TIMEDOUT == rc) { + LOGERR("timeout"); + ret = UNIFYFS_ERROR_TIMEOUT; + } else if (rc) { + LOGERR("failed to wait on condition (err=%d)", rc); + ret = UNIFYFS_ERROR_MARGO; + } + ABT_mutex_unlock(coll->resp_valid_sync); + // Now we can get the data from the output struct + + if (sizeof(metaget_all_bcast_out_t) != coll->output_sz) { + LOGERR("Unexpected size (%zu) for collective output - expected %zu", + coll->output_sz, sizeof(metaget_all_bcast_out_t)); + } + + // Pull the bulk data (the list of file_attr structs) over + metaget_all_bcast_out_t* results = (metaget_all_bcast_out_t*)coll->output; + + /* Now check the number of files - if it's 0, then we don't need to + * bother with the bulk tranfser. (In fact, if we were to try, we'd + * get an error.) */ + + if (results->num_files) { + hg_size_t buf_size = results->num_files * sizeof(unifyfs_file_attr_t); + attr_list = calloc(results->num_files, sizeof(unifyfs_file_attr_t)); + if (NULL == attr_list) { + ret = ENOMEM; + goto Exit_Invoke_BMA; + } + + // Figure out some margo-specific info that we need for the transfer + const struct hg_info* info = margo_get_info(coll->progress_hdl); + hg_addr_t server_addr = info->addr; + // address of the bulk data on the server side + margo_instance_id mid = + margo_hg_handle_get_instance(coll->progress_hdl); + + bulk_create_hret = + margo_bulk_create(mid, 1, (void**)&attr_list, &buf_size, + HG_BULK_WRITE_ONLY, &local_bulk); + if (HG_SUCCESS != bulk_create_hret) { + LOGERR("margo_bulk_create() failed - %s", + HG_Error_to_string(bulk_create_hret)); + ret = UNIFYFS_ERROR_MARGO; + goto Exit_Invoke_BMA; + } + + bulk_transfer_hret = + margo_bulk_transfer(mid, HG_BULK_PULL, server_addr, + results->file_meta, 0, local_bulk, 0, buf_size); + if (HG_SUCCESS != bulk_transfer_hret) { + LOGERR("margo_bulk_transfer() failed - %s", + HG_Error_to_string(bulk_transfer_hret)); + ret = UNIFYFS_ERROR_MARGO; + goto Exit_Invoke_BMA; + } + + /* At this point, attr_list should have the file_attr_t structs from + * all the other servers. However, we still need to assign the + * filename values for each struct. */ + + for (unsigned int i = 0; i < results->num_files; i++) { + /* Remember that we abused the filename pointer to actually hold + * an offset into the filenames string that we sent separately. + * (See the comments in process_metaget_bcast_rpc()) */ + uint64_t start_offset = (uint64_t)attr_list[i].filename; + uint64_t name_len; + if (i < (results->num_files-1)) { + name_len = ((uint64_t)attr_list[i+1].filename) - start_offset; + } else { + /* length is calculated differently for the last file in + * file_attrs */ + name_len = strlen(&results->filenames[start_offset]); + } + attr_list[i].filename = + strndup(&results->filenames[start_offset], name_len); + if (NULL == attr_list[i].filename) { + ret = ENOMEM; + LOGERR("strdup() failed processing filename"); + /* If we're actually getting ENOMEM from strdup(), the error + * log is probably also going to fail... */ + goto Exit_Invoke_BMA; + /* Technically, attr_list probably contains valid data and we + * could try to return a partial list. If we did, though, then + * we'd have ensure we checked for NULL before dereferencing + * the filename pointer *EVERYWHERE* else. Additionally, if + * we've really run out of memory, then other things are + * probably going to start failing pretty quickly. In short, + * trying to salvage this situation isn't worth the hassle. */ + } + } + } + + LOGINFO("Total number of files owned by the current server: %d", + num_local_files); + LOGINFO("Total number of files returned from children: %d", + results->num_files); + LOGINFO("Return code from children: %d", results->ret); + + /* Need to merge the metadata for the local files with the metadata + * returned by the RPC */ + if (num_local_files) { + size_t new_size = (results->num_files + num_local_files) * + sizeof(unifyfs_file_attr_t); + unifyfs_file_attr_t* merged_attr_list = realloc(attr_list, new_size); + if (merged_attr_list) { + attr_list = merged_attr_list; + } else { + LOGERR("Failed to realloc() file attr list!"); + ret = ENOMEM; + goto Exit_Invoke_BMA; + //TODO: Make sure we clean this up in the event of an error! + } + memcpy(&attr_list[results->num_files], local_file_attrs, + num_local_files * sizeof(unifyfs_file_attr_t)); + } + + // Make the results visible to the caller + *file_attrs = attr_list; + *num_file_attrs = results->num_files + num_local_files; + +Exit_Invoke_BMA: + /* If we hit an error somewhere, then there's a bunch of clean-up + * that we need to do... */ + if (UNIFYFS_SUCCESS != ret) { + if (HG_SUCCESS == bulk_transfer_hret) { + /* We made it past the bulk transfer before failing, which means + * we probably called strdup() on a bunch of filename strings. + * Have to free all of those... */ + for (unsigned int i = 0; i < results->num_files; i++) { + free(attr_list[i].filename); + } + } + /* If there's been an error, we won't be returning the attr_list + * pointer to the caller, so make sure it's freed. */ + free(attr_list); + + if (coll) { + /* If we have a collective struct, then there's some more + * clean-up to do: + * + * Depending on where any errors occurred above, the mutex + * might have been left in a locked state. Argobots doesn't + * provide a way to test this, so we'll do a trylock() followed + * by an unlock to ensure it's unlocked. */ + ABT_mutex_trylock(coll->resp_valid_sync); + ABT_mutex_unlock(coll->resp_valid_sync); + } else { + /* If we never got as far as creating the collective, then just + * free the input and output structs. (These were all initialized + * to NULL, so it's safe to call free() on them even if we never + * actually got around to allocating them.) */ + free(in); + free(out); + } + } + + if (local_file_attrs) { + /* If we successfully allocated memory for local_file_attrs, + * then we need to free it. */ + free(local_file_attrs); + } + + if (HG_SUCCESS == bulk_create_hret) { + /* If we successfully created the bulk, then we need to free it + * (regardless of the overall success of the function). */ + margo_bulk_free(local_bulk); + } + + if (coll) { + /* If we successfully created the collective, then we need to + * clean it up. */ + collective_cleanup(coll); + } + + return ret; +} diff --git a/server/src/unifyfs_group_rpc.h b/server/src/unifyfs_group_rpc.h index 70fba7b18..fb463694e 100644 --- a/server/src/unifyfs_group_rpc.h +++ b/server/src/unifyfs_group_rpc.h @@ -31,29 +31,26 @@ typedef struct coll_request { unifyfs_tree_t tree; hg_handle_t progress_hdl; hg_handle_t resp_hdl; - size_t output_sz; /* size of output struct */ - void* output; /* output struct (type is dependent on rpc) */ + size_t output_sz; /* size of output struct */ + void* output; /* output struct (type depends on rpc) */ void* input; - void* bulk_buf; /* allocated buffer for bulk data */ + void* bulk_buf; /* allocated buffer for bulk data */ hg_bulk_t bulk_in; hg_bulk_t bulk_forward; margo_request progress_req; margo_request* child_reqs; hg_handle_t* child_hdls; - int auto_cleanup; /* if true, bcast_progress_rpc() will - * call collective_cleanup() on this - * struct. This is the default behavior. */ - ABT_cond child_resp_valid; /* bcast_progress_rpc() will signal this - * condition variable when all the child - * responses have been processed. - * Intended to provide a mechanism for the - * server that originated a bcast RPC to - * wait for all the results to come - * back. */ - ABT_mutex child_resp_valid_mut; - /* The mutex associated with the above condition variable. */ - + int auto_cleanup; /* If set (non-zero), bcast_progress_rpc() + * will call collective_cleanup(). This is + * the default behavior. */ + ABT_cond resp_valid_cond; /* bcast_progress_rpc() will signal this + * condition variable when all the child + * responses have been processed. + * Provides a mechanism for the root + * server for a bcast RPC to wait for all + * the results to come back. */ + ABT_mutex resp_valid_sync; /* mutex for above condition variable */ } coll_request; /* set collective output return value to local result value */ @@ -75,18 +72,22 @@ void collective_cleanup(coll_request* coll_req); */ int invoke_bcast_progress_rpc(coll_request* coll_req); + +/** + * @brief Broadcast that all servers have completed bootstrapping + * + * @return success|failure + */ +int unifyfs_invoke_broadcast_bootstrap_complete(void); + /** * @brief Broadcast file extents metadata to all servers * * @param gfid target file - * @param len length of file extents array - * @param extents array of extents to broadcast * * @return success|failure */ -int unifyfs_invoke_broadcast_extents(int gfid, - unsigned int len, - struct extent_tree_node* extents); +int unifyfs_invoke_broadcast_extents(int gfid); /** * @brief Broadcast file attributes metadata to all servers @@ -144,5 +145,18 @@ int unifyfs_invoke_broadcast_truncate(int gfid, */ int unifyfs_invoke_broadcast_unlink(int gfid); +/** + * @brief Fetch metadata for all files on all servers + * + * @param file_attrs list of file metadata + * @param num_file_attrs number of files in the list + * + * @return success|failure + */ +/* Upon success, file_attrs will hold data that has been allocated with + * malloc() and must be freed by the caller with free(). In the event of an + * error, the caller must *NOT* free the pointer. */ +int unifyfs_invoke_broadcast_metaget_all(unifyfs_file_attr_t** file_attrs, + int* num_file_attrs); #endif // UNIFYFS_GROUP_RPC_H diff --git a/server/src/unifyfs_inode.c b/server/src/unifyfs_inode.c index a8e8bb107..74b3e157d 100644 --- a/server/src/unifyfs_inode.c +++ b/server/src/unifyfs_inode.c @@ -21,6 +21,7 @@ #include "unifyfs_inode.h" #include "unifyfs_inode_tree.h" #include "unifyfs_request_manager.h" +#include "unifyfs_p2p_rpc.h" // for hash_gfid_to_server() struct unifyfs_inode_tree _global_inode_tree; struct unifyfs_inode_tree* global_inode_tree = &_global_inode_tree; @@ -39,11 +40,14 @@ struct unifyfs_inode* unifyfs_inode_alloc(int gfid, unifyfs_file_attr_t* attr) extent_tree_init(tree); ino->extents = tree; ino->gfid = gfid; - ino->attr = *attr; - ino->attr.filename = strdup(attr->filename); - pthread_rwlock_init(&(ino->rwlock), NULL); - ABT_mutex_create(&(ino->abt_sync)); + ino->pending_extents = NULL; + + unifyfs_file_attr_set_invalid(&(ino->attr)); + unifyfs_file_attr_update(UNIFYFS_FILE_ATTR_OP_CREATE, + &(ino->attr), attr); + + ABT_rwlock_create(&(ino->rwlock)); } else { LOGERR("failed to allocate memory for inode"); } @@ -61,7 +65,7 @@ struct unifyfs_inode* unifyfs_inode_alloc(int gfid, unifyfs_file_attr_t* attr) static inline int unifyfs_inode_rdlock(struct unifyfs_inode* ino) { - return pthread_rwlock_rdlock(&ino->rwlock); + return ABT_rwlock_rdlock(ino->rwlock); } /** @@ -74,7 +78,7 @@ int unifyfs_inode_rdlock(struct unifyfs_inode* ino) static inline int unifyfs_inode_wrlock(struct unifyfs_inode* ino) { - return pthread_rwlock_wrlock(&ino->rwlock); + return ABT_rwlock_wrlock(ino->rwlock); } /** @@ -85,9 +89,28 @@ int unifyfs_inode_wrlock(struct unifyfs_inode* ino) static inline void unifyfs_inode_unlock(struct unifyfs_inode* ino) { - pthread_rwlock_unlock(&ino->rwlock); + ABT_rwlock_unlock(ino->rwlock); +} + + +/** + * @brief get the inode for a gfid. + * + * @param gfid gfid of inode to retrieve + */ +static inline +struct unifyfs_inode* unifyfs_inode_lookup(int gfid) +{ + struct unifyfs_inode* ino = NULL; + unifyfs_inode_tree_rdlock(global_inode_tree); + { + ino = unifyfs_inode_tree_search(global_inode_tree, gfid); + } + unifyfs_inode_tree_unlock(global_inode_tree); + return ino; } + int unifyfs_inode_create(int gfid, unifyfs_file_attr_t* attr) { if (NULL == attr) { @@ -113,13 +136,6 @@ int unifyfs_inode_create(int gfid, unifyfs_file_attr_t* attr) return ret; } -static int int_cmp_fn(const void* a, const void* b) -{ - int ai = *(int*)a; - int bi = *(int*)b; - return ai - bi; -} - int unifyfs_inode_destroy(struct unifyfs_inode* ino) { int ret = UNIFYFS_SUCCESS; @@ -187,7 +203,7 @@ int unifyfs_inode_destroy(struct unifyfs_inode* ino) free(ino->extents); if (NULL != local_clients) { - qsort(local_clients, n_clients, sizeof(int), int_cmp_fn); + qsort(local_clients, n_clients, sizeof(int), int_compare_fn); last_client = -1; for (size_t i = 0; i < n_clients; i++) { int cb_client_id = local_clients[i]; @@ -216,8 +232,7 @@ int unifyfs_inode_destroy(struct unifyfs_inode* ino) } } - pthread_rwlock_destroy(&(ino->rwlock)); - ABT_mutex_free(&(ino->abt_sync)); + ABT_rwlock_free(&(ino->rwlock)); free(ino); } else { @@ -235,21 +250,16 @@ int unifyfs_inode_update_attr(int gfid, int attr_op, } int ret = UNIFYFS_SUCCESS; - struct unifyfs_inode* ino = NULL; - - unifyfs_inode_tree_rdlock(global_inode_tree); - { - ino = unifyfs_inode_tree_search(global_inode_tree, gfid); - if (NULL == ino) { - ret = ENOENT; - } else { - unifyfs_inode_wrlock(ino); + struct unifyfs_inode* ino = unifyfs_inode_lookup(gfid); + if (NULL == ino) { + ret = ENOENT; + } else { + unifyfs_inode_wrlock(ino); + { unifyfs_file_attr_update(attr_op, &ino->attr, attr); - unifyfs_inode_unlock(ino); } + unifyfs_inode_unlock(ino); } - unifyfs_inode_tree_unlock(global_inode_tree); - return ret; } @@ -257,36 +267,27 @@ int unifyfs_inode_metaset(int gfid, int attr_op, unifyfs_file_attr_t* attr) { int ret; - if (attr_op == UNIFYFS_FILE_ATTR_OP_CREATE) { ret = unifyfs_inode_create(gfid, attr); } else { ret = unifyfs_inode_update_attr(gfid, attr_op, attr); } - return ret; } int unifyfs_inode_metaget(int gfid, unifyfs_file_attr_t* attr) { - int ret = UNIFYFS_SUCCESS; - struct unifyfs_inode* ino = NULL; - if ((NULL == global_inode_tree) || (NULL == attr)) { return EINVAL; } - unifyfs_inode_tree_rdlock(global_inode_tree); - { - ino = unifyfs_inode_tree_search(global_inode_tree, gfid); - if (NULL != ino) { - *attr = ino->attr; - } else { - ret = ENOENT; - } + int ret = UNIFYFS_SUCCESS; + struct unifyfs_inode* ino = unifyfs_inode_lookup(gfid); + if (NULL != ino) { + *attr = ino->attr; + } else { + ret = ENOENT; } - unifyfs_inode_tree_unlock(global_inode_tree); - return ret; } @@ -311,96 +312,175 @@ int unifyfs_inode_unlink(int gfid) int unifyfs_inode_truncate(int gfid, unsigned long size) { int ret = UNIFYFS_SUCCESS; - struct unifyfs_inode* ino = NULL; - unifyfs_inode_tree_rdlock(global_inode_tree); - { - ino = unifyfs_inode_tree_search(global_inode_tree, gfid); - if (!ino) { - ret = ENOENT; - } else { - unifyfs_inode_wrlock(ino); - { - if (ino->attr.is_laminated) { - LOGERR("cannot truncate a laminated file (gfid=%d)", gfid); - ret = EINVAL; - } else { - ino->attr.size = size; - if (NULL != ino->extents) { - ret = extent_tree_truncate(ino->extents, size); - } + struct unifyfs_inode* ino = unifyfs_inode_lookup(gfid); + if (NULL == ino) { + ret = ENOENT; + } else { + unifyfs_inode_wrlock(ino); + { + if (ino->attr.is_laminated) { + LOGERR("cannot truncate a laminated file (gfid=%d)", gfid); + ret = EINVAL; + } else { + ino->attr.size = size; + if (NULL != ino->extents) { + ret = extent_tree_truncate(ino->extents, size); } } - unifyfs_inode_unlock(ino); } + unifyfs_inode_unlock(ino); } - unifyfs_inode_tree_unlock(global_inode_tree); return ret; } -int unifyfs_inode_add_extents(int gfid, - int num_extents, - extent_metadata* extents) +int unifyfs_inode_add_pending_extents(int gfid, + client_rpc_req_t* client_req, + int num_extents, + extent_metadata* extents) { int ret = UNIFYFS_SUCCESS; - int i = 0; - struct unifyfs_inode* ino = NULL; - struct extent_tree* tree = NULL; - unifyfs_inode_tree_rdlock(global_inode_tree); + struct unifyfs_inode* ino = unifyfs_inode_lookup(gfid); + if (NULL == ino) { + return ENOENT; + } + + if (ino->attr.is_laminated) { + LOGERR("trying to add extents to a laminated file (gfid=%d)", + gfid); + return EINVAL; + } + + unifyfs_inode_wrlock(ino); { - ino = unifyfs_inode_tree_search(global_inode_tree, gfid); - if (!ino) { - ret = ENOENT; - goto out_unlock_tree; + if (NULL == ino->pending_extents) { + ino->pending_extents = arraylist_create(0); + if (NULL == ino->pending_extents) { + ret = ENOMEM; + LOGERR("failed to allocate inode pending extents list"); + goto add_pending_unlock_inode; + } } - if (ino->attr.is_laminated) { - LOGERR("trying to add extents to a laminated file (gfid=%d)", - gfid); - ret = EINVAL; - goto out_unlock_tree; + pending_extents_item* list_item = malloc(sizeof(*list_item)); + if (NULL == list_item) { + ret = ENOMEM; + LOGERR("failed to allocate inode pending extents list"); + goto add_pending_unlock_inode; + } + list_item->client_req = client_req; + list_item->num_extents = num_extents; + list_item->extents = extents; - ABT_mutex_lock(ino->abt_sync); + arraylist_add(ino->pending_extents, list_item); + } +add_pending_unlock_inode: + unifyfs_inode_unlock(ino); - unifyfs_inode_wrlock(ino); - { - tree = ino->extents; - if (NULL == tree) { - LOGERR("inode extent tree is missing"); - goto out_unlock_inode; - } + LOGINFO("added %d pending extents to inode (gfid=%d)", + num_extents, gfid); - for (i = 0; i < num_extents; i++) { - extent_metadata* current = extents + i; - ret = extent_tree_add(tree, current); - if (ret) { - LOGERR("failed to add extent [%lu, %lu] to gfid=%d", - current->start, current->end, gfid); - goto out_unlock_inode; - } - } - /* if the extent tree max offset is greater than the size we - * we currently have in the inode attributes, then update the - * inode size */ - unsigned long extent_sz = extent_tree_max_offset(ino->extents) + 1; - if ((uint64_t)extent_sz > ino->attr.size) { - ino->attr.size = extent_sz; - } + return ret; +} + +bool unifyfs_inode_has_pending_extents(int gfid) +{ + struct unifyfs_inode* ino = unifyfs_inode_lookup(gfid); + if (NULL == ino) { + return false; + } + + bool has_pending = false; + unifyfs_inode_rdlock(ino); + { + if (NULL != ino->pending_extents) { + has_pending = true; } -out_unlock_inode: - unifyfs_inode_unlock(ino); + } + unifyfs_inode_unlock(ino); - LOGINFO("added %d extents to inode (gfid=%d, filesize=%" PRIu64 ")", - num_extents, gfid, ino->attr.size); + return has_pending; +} - ABT_mutex_unlock(ino->abt_sync); +int unifyfs_inode_get_pending_extents(int gfid, + arraylist_t** pending_list) +{ + if (NULL == pending_list) { + return EINVAL; } -out_unlock_tree: - unifyfs_inode_tree_unlock(global_inode_tree); + *pending_list = NULL; + + struct unifyfs_inode* ino = unifyfs_inode_lookup(gfid); + if (NULL == ino) { + return ENOENT; + } + + unifyfs_inode_wrlock(ino); + { + if (NULL != ino->pending_extents) { + *pending_list = ino->pending_extents; + ino->pending_extents = NULL; + LOGINFO("returning pending list (sz=%d) from inode (gfid=%d)", + arraylist_size(*pending_list), gfid); + } + } + unifyfs_inode_unlock(ino); + + return UNIFYFS_SUCCESS; +} + +int unifyfs_inode_add_extents(int gfid, + int num_extents, + extent_metadata* extents) +{ + struct unifyfs_inode* ino = unifyfs_inode_lookup(gfid); + if (NULL == ino) { + return ENOENT; + } + + if (ino->attr.is_laminated) { + LOGERR("trying to add extents to a laminated file (gfid=%d)", + gfid); + return EINVAL; + } + + int ret = UNIFYFS_SUCCESS; + unifyfs_inode_wrlock(ino); + { + struct extent_tree* tree = ino->extents; + if (NULL == tree) { + LOGERR("inode extent tree is missing"); + goto add_unlock_inode; + } + + for (int i = 0; i < num_extents; i++) { + extent_metadata* current = extents + i; + ret = extent_tree_add(tree, current); + if (ret) { + LOGERR("failed to add extent [%lu, %lu] to gfid=%d", + current->start, current->end, gfid); + goto add_unlock_inode; + } + } + + /* if the extent tree max offset is greater than the size we + * we currently have in the inode attributes, then update the + * inode size */ + unsigned long extent_sz = extent_tree_max_offset(ino->extents) + 1; + if ((uint64_t)extent_sz > ino->attr.size) { + ino->attr.size = extent_sz; + } + } +add_unlock_inode: + unifyfs_inode_unlock(ino); + + LOGINFO("added %d extents to inode (gfid=%d, filesize=%" PRIu64 ")", + num_extents, gfid, ino->attr.size); + return ret; } @@ -409,27 +489,22 @@ int unifyfs_inode_get_filesize(int gfid, size_t* outsize) { int ret = UNIFYFS_SUCCESS; size_t filesize = 0; - struct unifyfs_inode* ino = NULL; - unifyfs_inode_tree_rdlock(global_inode_tree); - { - ino = unifyfs_inode_tree_search(global_inode_tree, gfid); - if (!ino) { - ret = ENOENT; - } else { - unifyfs_inode_rdlock(ino); - { - /* the size is updated each time we add extents or truncate, - * so no need to recalculate */ - filesize = ino->attr.size; - } - unifyfs_inode_unlock(ino); - - *outsize = filesize; - LOGDBG("local file size (gfid=%d): %lu", gfid, filesize); + struct unifyfs_inode* ino = unifyfs_inode_lookup(gfid); + if (NULL == ino) { + ret = ENOENT; + } else { + unifyfs_inode_rdlock(ino); + { + /* the size is updated each time we add extents or truncate, + * so no need to recalculate */ + filesize = ino->attr.size; } + unifyfs_inode_unlock(ino); } - unifyfs_inode_tree_unlock(global_inode_tree); + + *outsize = filesize; + LOGDBG("local file size (gfid=%d): %lu", gfid, filesize); return ret; } @@ -437,23 +512,17 @@ int unifyfs_inode_get_filesize(int gfid, size_t* outsize) int unifyfs_inode_laminate(int gfid) { int ret = UNIFYFS_SUCCESS; - struct unifyfs_inode* ino = NULL; - - unifyfs_inode_tree_rdlock(global_inode_tree); - { - ino = unifyfs_inode_tree_search(global_inode_tree, gfid); - if (!ino) { - ret = ENOENT; - } else { - unifyfs_inode_wrlock(ino); + struct unifyfs_inode* ino = unifyfs_inode_lookup(gfid); + if (NULL == ino) { + ret = ENOENT; + } else { + unifyfs_inode_wrlock(ino); + { ino->attr.is_laminated = 1; - unifyfs_inode_unlock(ino); - - LOGDBG("file laminated (gfid=%d)", gfid); } + unifyfs_inode_unlock(ino); + LOGDBG("laminated file (gfid=%d)", gfid); } - unifyfs_inode_tree_unlock(global_inode_tree); - return ret; } @@ -466,39 +535,31 @@ int unifyfs_inode_get_extents(int gfid, } int ret = UNIFYFS_SUCCESS; - struct unifyfs_inode* ino = NULL; - - unifyfs_inode_tree_rdlock(global_inode_tree); - { - ino = unifyfs_inode_tree_search(global_inode_tree, gfid); - if (NULL == ino) { - ret = ENOENT; - } else { - unifyfs_inode_rdlock(ino); - { - struct extent_tree* tree = ino->extents; - size_t n_extents = tree->count; - extent_metadata* _extents = calloc(n_extents, - sizeof(*_extents)); - if (NULL == _extents) { - ret = ENOMEM; - } else { - int i = 0; - struct extent_tree_node* curr = NULL; - while ((curr = extent_tree_iter(tree, curr)) != NULL) { - _extents[i] = curr->extent; - i++; - } - - *n = n_extents; - *extents = _extents; + struct unifyfs_inode* ino = unifyfs_inode_lookup(gfid); + if (NULL == ino) { + ret = ENOENT; + } else { + unifyfs_inode_rdlock(ino); + { + struct extent_tree* tree = ino->extents; + size_t n_extents = tree->count; + extent_metadata* _extents = calloc(n_extents, sizeof(*_extents)); + if (NULL == _extents) { + ret = ENOMEM; + } else { + int i = 0; + struct extent_tree_node* curr = NULL; + while ((curr = extent_tree_iter(tree, curr)) != NULL) { + _extents[i] = curr->extent; + i++; } + + *n = n_extents; + *extents = _extents; } - unifyfs_inode_unlock(ino); } + unifyfs_inode_unlock(ino); } - unifyfs_inode_tree_unlock(global_inode_tree); - return ret; } @@ -508,36 +569,31 @@ int unifyfs_inode_get_extent_chunks(unifyfs_extent_t* extent, int* full_coverage) { int ret = UNIFYFS_SUCCESS; - struct unifyfs_inode* ino = NULL; int gfid = extent->gfid; int covered = 0; *full_coverage = 0; - unifyfs_inode_tree_rdlock(global_inode_tree); - { - ino = unifyfs_inode_tree_search(global_inode_tree, gfid); - if (!ino) { - ret = ENOENT; - } else { - unifyfs_inode_rdlock(ino); - { - if (NULL != ino->extents) { - unsigned long offset = extent->offset; - unsigned long len = extent->length; - ret = extent_tree_get_chunk_list(ino->extents, offset, len, - n_chunks, chunks, - &covered); - if (ret) { - LOGERR("failed to get chunks for gfid:%d, ret=%d", - gfid, ret); - } + struct unifyfs_inode* ino = unifyfs_inode_lookup(gfid); + if (NULL == ino) { + ret = ENOENT; + } else { + unifyfs_inode_rdlock(ino); + { + if (NULL != ino->extents) { + unsigned long offset = extent->offset; + unsigned long len = extent->length; + ret = extent_tree_get_chunk_list(ino->extents, offset, len, + n_chunks, chunks, + &covered); + if (ret) { + LOGERR("failed to get chunks for gfid=%d (rc=%d)", + gfid, ret); } } - unifyfs_inode_unlock(ino); } + unifyfs_inode_unlock(ino); } - unifyfs_inode_tree_unlock(global_inode_tree); if (ret == UNIFYFS_SUCCESS) { /* extent_tree_get_chunk_list() does not populate the gfid field */ @@ -680,65 +736,23 @@ int unifyfs_inode_resolve_extent_chunks(unsigned int n_extents, return ret; } -int unifyfs_inode_span_extents( - int gfid, /* global file id we're looking in */ - unsigned long start, /* starting logical offset */ - unsigned long end, /* ending logical offset */ - int max, /* maximum number of key/vals to return */ - void* keys, /* array of length max for output keys */ - void* vals, /* array of length max for output values */ - int* outnum) /* number of entries returned */ -{ - int ret = UNIFYFS_SUCCESS; - struct unifyfs_inode* ino = NULL; - - unifyfs_inode_tree_rdlock(global_inode_tree); - { - ino = unifyfs_inode_tree_search(global_inode_tree, gfid); - if (!ino) { - ret = ENOENT; - } else { - unifyfs_inode_rdlock(ino); - { - ret = extent_tree_span(ino->extents, gfid, start, end, - max, keys, vals, outnum); - if (ret) { - LOGERR("extent_tree_span failed (gfid=%d, ret=%d)", - gfid, ret); - } - } - unifyfs_inode_unlock(ino); - } - - } - unifyfs_inode_tree_unlock(global_inode_tree); - - return ret; -} - int unifyfs_inode_dump(int gfid) { int ret = UNIFYFS_SUCCESS; - struct unifyfs_inode* ino = NULL; - - unifyfs_inode_tree_rdlock(global_inode_tree); - { - ino = unifyfs_inode_tree_search(global_inode_tree, gfid); - if (!ino) { - ret = ENOENT; - } else { - unifyfs_inode_rdlock(ino); - { - LOGDBG("== inode (gfid=%d) ==\n", ino->gfid); - if (NULL != ino->extents) { - LOGDBG("extents:"); - extent_tree_dump(ino->extents); - } + struct unifyfs_inode* ino = unifyfs_inode_lookup(gfid); + if (NULL == ino) { + ret = ENOENT; + } else { + unifyfs_inode_rdlock(ino); + { + LOGDBG("== inode (gfid=%d) ==\n", ino->gfid); + if (NULL != ino->extents) { + LOGDBG("extents:"); + extent_tree_dump(ino->extents); } - unifyfs_inode_unlock(ino); } + unifyfs_inode_unlock(ino); } - unifyfs_inode_tree_unlock(global_inode_tree); return ret; } @@ -749,9 +763,9 @@ int unifyfs_get_gfids(int* num_gfids, int** gfid_list) int ret = UNIFYFS_SUCCESS; int _num_gfids = 0; - int gfid_list_size = 64; // TODO: Is there a better starting number??? - int* _gfid_list = malloc(sizeof(int) * gfid_list_size); - if (!_gfid_list) { + int gfid_list_size = 64; + int* _gfid_list = calloc(gfid_list_size, sizeof(int)); + if (NULL == _gfid_list) { return ENOMEM; } @@ -779,3 +793,87 @@ int unifyfs_get_gfids(int* num_gfids, int** gfid_list) *gfid_list = _gfid_list; return ret; } + + +int unifyfs_get_owned_files(unsigned int* num_files, + unifyfs_file_attr_t** attr_list) +{ + /* Iterate through the global_inode_tree and copy all the file + * attr structs for the files this server owns. + * Note: the file names in the unifyfs_file_attr_t are pointers to + * separately allocated memory, so they will be created using strdup(). */ + + unsigned int attr_list_size = 64; + unsigned int num_files_int = 0; + unifyfs_file_attr_t* attr_list_int; + /* _int suffix is short for "internal". If everything succeeds, then + * before returning, we'll copy num_files_int to num_files and + * attr_list_int to attr_list. */ + + attr_list_int = malloc(sizeof(unifyfs_file_attr_t) * attr_list_size); + if (!attr_list_int) { + return ENOMEM; + } + + unifyfs_inode_tree_rdlock(global_inode_tree); + { + struct unifyfs_inode* node = + unifyfs_inode_tree_iter(global_inode_tree, NULL); + while (node) { + if (num_files_int == attr_list_size) { + attr_list_size *= 2; // Double the list size each time + attr_list_int = + realloc(attr_list_int, + sizeof(unifyfs_file_attr_t)*attr_list_size); + if (!attr_list_int) { + unifyfs_inode_tree_unlock(global_inode_tree); + free(attr_list_int); + return ENOMEM; + } + } + + /* We only want to copy file attrs that we're the owner of */ + int owner_rank = hash_gfid_to_server(node->attr.gfid); + if (owner_rank == glb_pmi_rank) { + memcpy(&attr_list_int[num_files_int], &node->attr, + sizeof(unifyfs_file_attr_t)); + + /* filename is a pointer to separately allocated memory. + * We need to do a deep copy, so create a new string with + * strdup(). */ + attr_list_int[num_files_int].filename = + strdup(node->attr.filename); + + num_files_int++; + } + node = unifyfs_inode_tree_iter(global_inode_tree, node); + } + } + unifyfs_inode_tree_unlock(global_inode_tree); + + /* realloc() the array list space down to only what we need. + * + * Note that corner cases get a little odd here: + * 1) If num_files_int is 0, this will conveniently free() the list for + * us. The pointer we get back from realloc() will be NULL and that's + * exactly what we should return to the caller. + * 2) If the realloc actually fails (and it's unclear how that could + * happen given that we're reducing the size of the allocation), then + * the original memory will be left untouched and we we can return the + * original pointer. That wastes some space, but the pointer is + * valid and the memory will eventually be freed by the caller. */ + unifyfs_file_attr_t* attr_list_int2 = realloc( + attr_list_int, num_files_int * sizeof(unifyfs_file_attr_t)); + if (NULL == attr_list_int2) { + if (0 != num_files_int) { + /* realloc() actually failed. Wow. */ + attr_list_int2 = attr_list_int; + } + } + + /* If we made it here, then we walked the tree with no errors, so update + * the return parameters and return. */ + *attr_list = attr_list_int2; + *num_files = num_files_int; + return UNIFYFS_SUCCESS; +} diff --git a/server/src/unifyfs_inode.h b/server/src/unifyfs_inode.h index afa768e09..ac1dcb33b 100644 --- a/server/src/unifyfs_inode.h +++ b/server/src/unifyfs_inode.h @@ -18,15 +18,11 @@ #include "unifyfs_global.h" #include "extent_tree.h" -/** - * @brief file extent descriptor - */ -struct unifyfs_inode_extent { - int gfid; - unsigned long offset; - unsigned long length; -}; -typedef struct unifyfs_inode_extent unifyfs_inode_extent_t; +typedef struct pending_extents_item { + client_rpc_req_t* client_req; /* req details, including response handle */ + unsigned int num_extents; /* number of extents in array */ + extent_metadata* extents; /* array of extent metadata */ +} pending_extents_item; /** * @brief file and directory inode structure. this holds: @@ -38,9 +34,9 @@ struct unifyfs_inode { int gfid; /* global file identifier */ unifyfs_file_attr_t attr; /* file attributes */ struct extent_tree* extents; /* extent information */ + arraylist_t* pending_extents; /* list of pending_extents_item */ - pthread_rwlock_t rwlock; /* rwlock for pthread access */ - ABT_mutex abt_sync; /* mutex for argobots ULT access */ + ABT_rwlock rwlock; /* reader-writer lock */ }; /** @@ -134,13 +130,50 @@ int unifyfs_inode_get_extents(int gfid, extent_metadata** extents); /** - * @brief add new extents to the inode + * @brief add extents pending sync to the inode * * @param gfid the global file identifier - * @param num_extents the number of new extents in @nodes - * @param extents an array of extents to be added + * @param client_req the client req to sync the extents + * @param num_extents the number of extents in @extents + * @param extents an array of extents to be added as pending + * + * @return 0 on success, errno otherwise + */ +int unifyfs_inode_add_pending_extents(int gfid, + client_rpc_req_t* client_req, + int num_extents, + extent_metadata* extents); + +/** + * @brief check if inode has pending extents to sync + * + * @param gfid the global file identifier + * + * @return true if pending extents exist, false otherwise + */ +bool unifyfs_inode_has_pending_extents(int gfid); + +/** + * @brief retrieve pending extents list for the inode + * (future adds will go to a new pending list) + * + * @param gfid the global file identifier + * @param[out] pending_list the list of pending extents (if any) + * + * @return 0 on success, errno otherwise + */ + +int unifyfs_inode_get_pending_extents(int gfid, + arraylist_t** pending_list); + +/** + * @brief add new extents to the inode * - * @return + * @param gfid the global file identifier + * @param num_extents the number of extents in @extents + * @param extents an array of extents to be added + * + * @return 0 on success, errno otherwise */ int unifyfs_inode_add_extents(int gfid, int num_extents, @@ -200,33 +233,6 @@ int unifyfs_inode_resolve_extent_chunks(unsigned int n_extents, chunk_read_req_t** chunklocs, int* full_coverage); -/** - * @brief calls extents_tree_span, which will do: - * - * given an extent tree and starting and ending logical offsets, fill in - * key/value entries that overlap that range, returns at most max entries - * starting from lowest starting offset, sets outnum with actual number of - * entries returned - * - * @param gfid global file id - * @param start starting logical offset - * @param end ending logical offset - * @param max maximum number of key/vals to return - * - * @param[out] keys array of length max for output keys - * @param[out] vals array of length max for output values - * @param[out] outnum output number of entries returned - * - * @return - */ -int unifyfs_inode_span_extents(int gfid, - unsigned long start, - unsigned long end, - int max, - void* keys, - void* vals, - int* outnum); - /** * @brief prints the inode information to the log stream * @@ -244,5 +250,19 @@ int unifyfs_inode_dump(int gfid); */ int unifyfs_get_gfids(int* num_gfids, int** gfid_list); + +/** + * @brief Walk the tree and return a list file_attr_t structs for all files + * that we own. + * + * Upon success, the caller will be responsible for freeing attr_list. If + * this function returns an error code, then the caller must *NOT* free + * attr_list. + * + * @return 0 on success, errno otherwise + */ +int unifyfs_get_owned_files(unsigned int* num_files, + unifyfs_file_attr_t** attr_list); + #endif /* __UNIFYFS_INODE_H */ diff --git a/server/src/unifyfs_inode_tree.c b/server/src/unifyfs_inode_tree.c index 039174e89..3dee0881f 100644 --- a/server/src/unifyfs_inode_tree.c +++ b/server/src/unifyfs_inode_tree.c @@ -26,9 +26,8 @@ #define MIN(a, b) (a < b ? a : b) #define MAX(a, b) (a > b ? a : b) -static int unifyfs_inode_tree_compare_func( - struct unifyfs_inode* node1, - struct unifyfs_inode* node2) +static int uit_compare_func(struct unifyfs_inode* node1, + struct unifyfs_inode* node2) { if (node1->gfid > node2->gfid) { return 1; @@ -39,35 +38,29 @@ static int unifyfs_inode_tree_compare_func( } } -RB_PROTOTYPE( - rb_inode_tree, unifyfs_inode, - inode_tree_entry, unifyfs_inode_tree_compare_func) -RB_GENERATE( - rb_inode_tree, unifyfs_inode, - inode_tree_entry, unifyfs_inode_tree_compare_func) +RB_PROTOTYPE(rb_inode_tree, unifyfs_inode, inode_tree_entry, uit_compare_func) +RB_GENERATE(rb_inode_tree, unifyfs_inode, inode_tree_entry, uit_compare_func) /* Returns 0 on success, positive non-zero error code otherwise */ -int unifyfs_inode_tree_init( - struct unifyfs_inode_tree* tree) +int unifyfs_inode_tree_init(struct unifyfs_inode_tree* tree) { if (NULL == tree) { return EINVAL; } memset(tree, 0, sizeof(*tree)); - pthread_rwlock_init(&tree->rwlock, NULL); + ABT_rwlock_create(&(tree->rwlock)); RB_INIT(&tree->head); return UNIFYFS_SUCCESS; } /* Remove and free all nodes in the unifyfs_inode_tree. */ -void unifyfs_inode_tree_destroy( - struct unifyfs_inode_tree* tree) +void unifyfs_inode_tree_destroy(struct unifyfs_inode_tree* tree) { if (NULL != tree) { unifyfs_inode_tree_clear(tree); - pthread_rwlock_destroy(&tree->rwlock); + ABT_rwlock_free(&(tree->rwlock)); } } diff --git a/server/src/unifyfs_inode_tree.h b/server/src/unifyfs_inode_tree.h index 8bec5d8d2..7d1b0461f 100644 --- a/server/src/unifyfs_inode_tree.h +++ b/server/src/unifyfs_inode_tree.h @@ -12,10 +12,9 @@ * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. */ -#ifndef __UNIFYFS_INODE_TREE_H -#define __UNIFYFS_INODE_TREE_H +#ifndef UNIFYFS_INODE_TREE_H +#define UNIFYFS_INODE_TREE_H -#include #include "tree.h" #include "extent_tree.h" #include "unifyfs_meta.h" @@ -31,7 +30,7 @@ */ struct unifyfs_inode_tree { RB_HEAD(rb_inode_tree, unifyfs_inode) head; /** inode RB tree */ - pthread_rwlock_t rwlock; /** lock for accessing tree */ + ABT_rwlock rwlock; /** lock for accessing tree */ }; /** @@ -80,14 +79,14 @@ int unifyfs_inode_tree_insert(struct unifyfs_inode_tree* tree, * @return 0 on success, errno otherwise */ int unifyfs_inode_tree_remove(struct unifyfs_inode_tree* tree, - int gfid, struct unifyfs_inode** removed); + int gfid, + struct unifyfs_inode** removed); /* Search for and return extents for given gfid on specified tree. * If not found, return NULL, assumes caller has lock on tree */ struct unifyfs_inode* unifyfs_inode_tree_search( struct unifyfs_inode_tree* tree, /* tree to search */ - int gfid /* global file id to find */ -); + int gfid); /* global file id to find */ /** * @brief Iterate the inode tree. @@ -143,9 +142,10 @@ struct unifyfs_inode* unifyfs_inode_tree_iter(struct unifyfs_inode_tree* tree, * * @return 0 on success, errno otherwise */ -static inline int unifyfs_inode_tree_rdlock(struct unifyfs_inode_tree* tree) +static inline +int unifyfs_inode_tree_rdlock(struct unifyfs_inode_tree* tree) { - return pthread_rwlock_rdlock(&tree->rwlock); + return ABT_rwlock_rdlock(tree->rwlock); } /** @@ -157,9 +157,10 @@ static inline int unifyfs_inode_tree_rdlock(struct unifyfs_inode_tree* tree) * * @return 0 on success, errno otherwise */ -static inline int unifyfs_inode_tree_wrlock(struct unifyfs_inode_tree* tree) +static inline +int unifyfs_inode_tree_wrlock(struct unifyfs_inode_tree* tree) { - return pthread_rwlock_wrlock(&tree->rwlock); + return ABT_rwlock_wrlock(tree->rwlock); } /** @@ -169,10 +170,11 @@ static inline int unifyfs_inode_tree_wrlock(struct unifyfs_inode_tree* tree) * * @param tree inode tree */ -static inline void unifyfs_inode_tree_unlock(struct unifyfs_inode_tree* tree) +static inline +void unifyfs_inode_tree_unlock(struct unifyfs_inode_tree* tree) { - pthread_rwlock_unlock(&tree->rwlock); + ABT_rwlock_unlock(tree->rwlock); } -#endif /* __UNIFYFS_INODE_TREE_H */ +#endif /* UNIFYFS_INODE_TREE_H */ diff --git a/server/src/unifyfs_metadata_mdhim.c b/server/src/unifyfs_metadata_mdhim.c deleted file mode 100644 index bda3a9da1..000000000 --- a/server/src/unifyfs_metadata_mdhim.c +++ /dev/null @@ -1,574 +0,0 @@ -/* - * Copyright (c) 2020, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * - * Copyright 2020, UT-Battelle, LLC. - * - * LLNL-CODE-741539 - * All rights reserved. - * - * This is the license for UnifyFS. - * For details, see https://github.com/LLNL/UnifyFS. - * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. - */ - -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * Copyright (c) 2017, Florida State University. Contributions from - * the Computer Architecture and Systems Research Laboratory (CASTL) - * at the Department of Computer Science. - * - * Written by: Teng Wang, Adam Moody, Wekuan Yu, Kento Sato, Kathryn Mohror - * LLNL-CODE-728877. All rights reserved. - * - * This file is part of burstfs. - * For details, see https://github.com/llnl/burstfs - * Please read https://github.com/llnl/burstfs/LICNSE for full license text. - */ - -// NOTE: following two lines needed for nftw(), MUST COME FIRST IN FILE -#define _XOPEN_SOURCE 500 -#include - -// common headers -#include "unifyfs_client_rpcs.h" - -// server headers -#include "unifyfs_global.h" -#include "unifyfs_metadata_mdhim.h" - -// MDHIM headers -#include "indexes.h" -#include "mdhim.h" - -#define UNIFYFS_META_DB_NAME unifyfs_db -#define UNIFYFS_META_DB_PATH RUNDIR - -struct mdhim_t* md; - -/* we use two MDHIM indexes: - * 0) for file extents - * 1) for file attributes */ -#define IDX_FILE_EXTENTS (0) -#define IDX_FILE_ATTR (1) -struct index_t* unifyfs_indexes[2]; - - -int unifyfs_key_compare(unifyfs_key_t* a, unifyfs_key_t* b) -{ - assert((NULL != a) && (NULL != b)); - if (a->gfid == b->gfid) { - if (a->offset == b->offset) { - return 0; - } else if (a->offset < b->offset) { - return -1; - } else { - return 1; - } - } else if (a->gfid < b->gfid) { - return -1; - } else { - return 1; - } -} - -/* order keyvals by gfid, then host delegator rank */ -int unifyfs_keyval_compare(const void* a, const void* b) -{ - assert((NULL != a) && (NULL != b)); - - const unifyfs_keyval_t* kv_a = a; - const unifyfs_keyval_t* kv_b = b; - - int gfid_a = kv_a->key.gfid; - int gfid_b = kv_b->key.gfid; - if (gfid_a == gfid_b) { - int rank_a = kv_a->val.delegator_rank; - int rank_b = kv_b->val.delegator_rank; - if (rank_a == rank_b) { - return 0; - } else if (rank_a < rank_b) { - return -1; - } else { - return 1; - } - } else if (gfid_a < gfid_b) { - return -1; - } else { - return 1; - } -} - -/* initialize the key-value store */ -int meta_init_store(unifyfs_cfg_t* cfg) -{ - int rc, ratio; - MPI_Comm comm = MPI_COMM_WORLD; - long svr_ratio, range_sz; - struct stat ss; - char db_path[UNIFYFS_MAX_FILENAME] = {0}; - - if (cfg == NULL) { - return -1; - } - - mdhim_options_t* db_opts = mdhim_options_init(); - if (db_opts == NULL) { - return -1; - } - mdhim_options_set_db_type(db_opts, LEVELDB); - mdhim_options_set_db_name(db_opts, UNIFYFS_META_DB_NAME); - mdhim_options_set_key_type(db_opts, MDHIM_UNIFYFS_KEY); - mdhim_options_set_debug_level(db_opts, MLOG_CRIT); - - /* UNIFYFS_META_DB_PATH: root directory for metadata */ - snprintf(db_path, sizeof(db_path), "%s/mdhim", cfg->meta_db_path); - rc = stat(db_path, &ss); - if (rc != 0) { - rc = mkdir(db_path, 0770); - if (rc != 0) { - LOGERR("failed to create MDHIM metadata directory %s", db_path); - return -1; - } - } - mdhim_options_set_db_path(db_opts, strdup(db_path)); - - /* number of metadata servers = - * number of unifyfs servers / UNIFYFS_META_SERVER_RATIO */ - ratio = (int) UNIFYFS_META_SERVER_RATIO; - mdhim_options_set_server_factor(db_opts, ratio); - - /* indices/attributes are striped to servers according - * to config setting for UNIFYFS_META_RANGE_SIZE. */ - range_sz = 0; - rc = configurator_int_val(cfg->meta_range_size, &range_sz); - if (rc != 0) { - return -1; - } - meta_slice_sz = (size_t) range_sz; - mdhim_options_set_max_recs_per_slice(db_opts, (uint64_t)range_sz); - - md = mdhimInit(&comm, db_opts); - - /* index for storing file extent metadata */ - unifyfs_indexes[IDX_FILE_EXTENTS] = md->primary_index; - - /* index for storing file attribute metadata */ - unifyfs_indexes[IDX_FILE_ATTR] = create_global_index(md, - ratio, 1, LEVELDB, MDHIM_INT_KEY, "file_attr"); - - return 0; -} - -void print_fsync_indices(unifyfs_key_t** keys, - unifyfs_val_t** vals, - size_t num_entries) -{ - size_t i; - for (i = 0; i < num_entries; i++) { - LOGDBG("gfid:%d, offset:%lu, addr:%lu, len:%lu, del_id:%d", - keys[i]->gfid, keys[i]->offset, - vals[i]->addr, vals[i]->len, - vals[i]->delegator_rank); - } -} - -static int remove_cb(const char* fpath, const struct stat* sb, - int typeflag, struct FTW* ftwbuf) -{ - int rc = remove(fpath); - if (rc) { - LOGERR("failed to remove(%s)", fpath); - } - return rc; -} - -static int remove_mdhim_db_filetree(char* db_root_path) -{ - LOGDBG("remove MDHIM DB filetree at %s", db_root_path); - return nftw(db_root_path, remove_cb, 64, FTW_DEPTH | FTW_PHYS); -} - - -int meta_sanitize(void) -{ - int rc; - char db_path[UNIFYFS_MAX_FILENAME] = {0}; - - // capture db_path before closing MDHIM - snprintf(db_path, sizeof(db_path), "%s", md->db_opts->db_path); - - mdhimClose(md); - md = NULL; - - // remove the metadata filetree - rc = remove_mdhim_db_filetree(db_path); - if (rc) { - LOGERR("failure during MDHIM file tree removal"); - } - - return UNIFYFS_SUCCESS; -} - -// New API -/* - * - */ -int unifyfs_set_file_attribute( - int set_size, - int set_laminate, - unifyfs_file_attr_t* fattr_ptr) -{ - int rc = UNIFYFS_SUCCESS; - - /* select index for file attributes */ - md->primary_index = unifyfs_indexes[IDX_FILE_ATTR]; - - int gfid = fattr_ptr->gfid; - - /* if we want to preserve some settings, - * we copy those fields from attributes - * on the existing entry, if there is one */ - int preserve = (!set_size || !set_laminate); - if (preserve) { - /* lookup existing attributes for the file */ - unifyfs_file_attr_t attr; - int get_rc = unifyfs_get_file_attribute(gfid, &attr); - if (get_rc == UNIFYFS_SUCCESS) { - /* found the attributes for this file, - * if size flag is not set, preserve existing size value */ - if (!set_size) { - fattr_ptr->size = attr.size; - } - - /* if laminate flag is not set, - * preserve existing is_laminated state */ - if (!set_laminate) { - fattr_ptr->is_laminated = attr.is_laminated; - } - } else { - /* otherwise, trying to update attributes for a file that - * we can't find */ - return get_rc; - } - } - - /* insert file attribute for given global file id */ - struct mdhim_brm_t* brm = mdhimPut(md, - &gfid, sizeof(int), - fattr_ptr, sizeof(unifyfs_file_attr_t), - NULL, NULL); - - if (!brm || brm->error) { - rc = (int)UNIFYFS_ERROR_MDHIM; - } - - if (brm) { - mdhim_full_release_msg(brm); - } - - if (rc != UNIFYFS_SUCCESS) { - LOGERR("failed to insert attributes for gfid=%d", gfid); - } - return rc; -} - -/* - * - */ -int unifyfs_set_file_attributes(int num_entries, - fattr_key_t** keys, int* key_lens, - unifyfs_file_attr_t** fattr_ptr, int* val_lens) -{ - int rc = UNIFYFS_SUCCESS; - - /* select index for file attributes */ - md->primary_index = unifyfs_indexes[IDX_FILE_ATTR]; - - /* put list of key/value pairs */ - struct mdhim_brm_t* brm = mdhimBPut(md, - (void**)keys, key_lens, - (void**)fattr_ptr, val_lens, - num_entries, NULL, NULL); - - /* check for errors and free resources */ - if (!brm) { - rc = (int)UNIFYFS_ERROR_MDHIM; - } else { - /* step through linked list of messages, - * scan for any error and free messages */ - struct mdhim_brm_t* brmp = brm; - while (brmp) { - /* check current item for error */ - if (brmp->error) { - LOGERR("MDHIM bulk put error=%d", brmp->error); - rc = (int)UNIFYFS_ERROR_MDHIM; - } - - /* record pointer to current item, - * advance loop pointer to next item in list, - * free resources for current item */ - brm = brmp; - brmp = brmp->next; - mdhim_full_release_msg(brm); - } - } - - if (rc != UNIFYFS_SUCCESS) { - LOGERR("failed to bulk insert file attributes"); - } - return rc; -} - -/* given a global file id, lookup and return file attributes */ -int unifyfs_get_file_attribute( - int gfid, - unifyfs_file_attr_t* attr) -{ - int rc = UNIFYFS_SUCCESS; - - /* select index holding file attributes, - * execute lookup for given file id */ - md->primary_index = unifyfs_indexes[IDX_FILE_ATTR]; - struct mdhim_bgetrm_t* bgrm = mdhimGet(md, md->primary_index, - &gfid, sizeof(int), MDHIM_GET_EQ); - - if (!bgrm || bgrm->error) { - /* failed to find info for this file id */ - rc = (int)UNIFYFS_ERROR_MDHIM; - } else { - /* copy file attribute from value into output parameter */ - unifyfs_file_attr_t* ptr = (unifyfs_file_attr_t*)bgrm->values[0]; - memcpy(attr, ptr, sizeof(unifyfs_file_attr_t)); - } - - /* free resources returned from lookup */ - if (bgrm) { - mdhim_full_release_msg(bgrm); - } - - if (rc != UNIFYFS_SUCCESS) { - LOGERR("failed to retrieve attributes for gfid=%d", gfid); - } - return rc; -} - -/* given a global file id, delete file attributes */ -int unifyfs_delete_file_attribute( - int gfid) -{ - int rc = UNIFYFS_SUCCESS; - - /* select index holding file attributes, - * delete entry for given file id */ - md->primary_index = unifyfs_indexes[IDX_FILE_ATTR]; - struct mdhim_brm_t* brm = mdhimDelete(md, md->primary_index, - &gfid, sizeof(int)); - - /* check for errors and free resources */ - if (!brm) { - rc = (int)UNIFYFS_ERROR_MDHIM; - } else { - /* step through linked list of messages, - * scan for any error and free messages */ - struct mdhim_brm_t* brmp = brm; - while (brmp) { - /* check current item for error */ - if (brmp->error) { - LOGERR("MDHIM delete error=%d", brmp->error); - rc = (int)UNIFYFS_ERROR_MDHIM; - } - - /* record pointer to current item, - * advance loop pointer to next item in list, - * free resources for current item */ - brm = brmp; - brmp = brmp->next; - mdhim_full_release_msg(brm); - } - } - - if (rc != UNIFYFS_SUCCESS) { - LOGERR("failed to delete attributes for gfid=%d", gfid); - } - return rc; -} - -/* - * - */ -int unifyfs_get_file_extents(int num_keys, unifyfs_key_t** keys, - int* key_lens, int* num_values, - unifyfs_keyval_t** keyvals) -{ - /* - * This is using a modified version of mdhim. The function will return all - * key-value pairs within the range of the key tuple. - * We need to re-evaluate this function to use different key-value stores. - */ - int rc = UNIFYFS_SUCCESS; - - /* initialize output values */ - *num_values = 0; - *keyvals = NULL; - - /* select index for file extents */ - md->primary_index = unifyfs_indexes[IDX_FILE_EXTENTS]; - - /* execute range query */ - struct mdhim_bgetrm_t* bkvlist = mdhimBGet(md, md->primary_index, - (void**)keys, key_lens, num_keys, MDHIM_RANGE_BGET); - - /* iterate over each item in list, check for errors - * and sum up total number of key/value pairs we got back */ - size_t tot_num = 0; - struct mdhim_bgetrm_t* ptr = bkvlist; - while (ptr) { - /* check that we don't have an error condition */ - if (ptr->error) { - /* hit an error */ - LOGERR("MDHIM range query error=%d", ptr->error); - return (int)UNIFYFS_ERROR_MDHIM; - } - - /* total up number of key/values returned */ - tot_num += (size_t) ptr->num_keys; - - /* get pointer to next item in the list */ - ptr = ptr->next; - } - - /* allocate memory to copy key/value data */ - unifyfs_keyval_t* kvs = (unifyfs_keyval_t*) calloc( - tot_num, sizeof(unifyfs_keyval_t)); - if (NULL == kvs) { - LOGERR("failed to allocate keyvals"); - return ENOMEM; - } - - /* iterate over list and copy each key/value into output array */ - ptr = bkvlist; - unifyfs_keyval_t* kviter = kvs; - while (ptr) { - /* iterate over key/value in list element */ - int i; - for (i = 0; i < ptr->num_keys; i++) { - /* get pointer to current key and value */ - unifyfs_key_t* tmp_key = (unifyfs_key_t*)ptr->keys[i]; - unifyfs_val_t* tmp_val = (unifyfs_val_t*)ptr->values[i]; - - /* copy contents over to output array */ - memcpy(&(kviter->key), tmp_key, sizeof(unifyfs_key_t)); - memcpy(&(kviter->val), tmp_val, sizeof(unifyfs_val_t)); - - /* bump up to next element in output array */ - kviter++; - } - - /* get pointer to next item in the list */ - struct mdhim_bgetrm_t* next = ptr->next; - - /* release resources for the curren item */ - mdhim_full_release_msg(ptr); - ptr = next; - } - - /* set output values */ - *num_values = tot_num; - *keyvals = kvs; - - return rc; -} - -/* - * - */ -int unifyfs_set_file_extents(int num_entries, - unifyfs_key_t** keys, int* key_lens, - unifyfs_val_t** vals, int* val_lens) -{ - int rc = UNIFYFS_SUCCESS; - - /* select index for file extents */ - md->primary_index = unifyfs_indexes[IDX_FILE_EXTENTS]; - - /* put list of key/value pairs */ - struct mdhim_brm_t* brm = mdhimBPut(md, - (void**)(keys), key_lens, - (void**)(vals), val_lens, - num_entries, NULL, NULL); - - /* check for errors and free resources */ - if (!brm) { - rc = (int)UNIFYFS_ERROR_MDHIM; - } else { - /* step through linked list of messages, - * scan for any error and free messages */ - struct mdhim_brm_t* brmp = brm; - while (brmp) { - /* check current item for error */ - if (brmp->error) { - LOGERR("MDHIM bulk put error=%d", brmp->error); - rc = (int)UNIFYFS_ERROR_MDHIM; - } - - /* record pointer to current item, - * advance loop pointer to next item in list, - * free resources for current item */ - brm = brmp; - brmp = brmp->next; - mdhim_full_release_msg(brm); - } - } - - if (rc != UNIFYFS_SUCCESS) { - LOGERR("failed to bulk insert file extents"); - } - return rc; -} - -/* delete the listed keys from the file extents */ -int unifyfs_delete_file_extents( - int num_entries, /* number of items in keys list */ - unifyfs_key_t** keys, /* list of keys to be deleted */ - int* key_lens) /* list of byte sizes for keys list items */ -{ - /* assume we'll succeed */ - int rc = UNIFYFS_SUCCESS; - - /* select index for file extents */ - md->primary_index = unifyfs_indexes[IDX_FILE_EXTENTS]; - - /* delete list of key/value pairs */ - struct mdhim_brm_t* brm = mdhimBDelete(md, md->primary_index, - (void**)(keys), key_lens, num_entries); - - /* check for errors and free resources */ - if (!brm) { - rc = (int)UNIFYFS_ERROR_MDHIM; - } else { - /* step through linked list of messages, - * scan for any error and free messages */ - struct mdhim_brm_t* brmp = brm; - while (brmp) { - /* check current item for error */ - if (brmp->error) { - LOGERR("MDHIM bulk delete error=%d", brmp->error); - rc = (int)UNIFYFS_ERROR_MDHIM; - } - - /* record pointer to current item, - * advance loop pointer to next item in list, - * free resources for current item */ - brm = brmp; - brmp = brmp->next; - mdhim_full_release_msg(brm); - } - } - - if (rc != UNIFYFS_SUCCESS) { - LOGERR("failed to bulk delete file extents"); - } - return rc; -} diff --git a/server/src/unifyfs_metadata_mdhim.h b/server/src/unifyfs_metadata_mdhim.h deleted file mode 100644 index e01f6ddca..000000000 --- a/server/src/unifyfs_metadata_mdhim.h +++ /dev/null @@ -1,250 +0,0 @@ -/* - * Copyright (c) 2020, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * - * Copyright 2020, UT-Battelle, LLC. - * - * LLNL-CODE-741539 - * All rights reserved. - * - * This is the license for UnifyFS. - * For details, see https://github.com/LLNL/UnifyFS. - * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. - */ - -/* - * Copyright (c) 2017, Lawrence Livermore National Security, LLC. - * Produced at the Lawrence Livermore National Laboratory. - * Copyright (c) 2017, Florida State University. Contributions from - * the Computer Architecture and Systems Research Laboratory (CASTL) - * at the Department of Computer Science. - * - * Written by: Teng Wang, Adam Moody, Weikuan Yu, Kento Sato, Kathryn Mohror - * LLNL-CODE-728877. All rights reserved. - * - * This file is part of burstfs. - * For details, see https://github.com/llnl/burstfs - * Please read https://github.com/llnl/burstfs/LICENSE for full license text. - */ - -#ifndef UNIFYFS_METADATA_MDHIM_H -#define UNIFYFS_METADATA_MDHIM_H - -#include "unifyfs_configurator.h" -#include "unifyfs_log.h" -#include "unifyfs_meta.h" - -/* number of metadata servers = - * number of unifyfs servers / UNIFYFS_META_SERVER_RATIO */ -#define UNIFYFS_META_SERVER_RATIO 1 - -/* max count of remote read requests (per-server) */ -#define UNIFYFS_MAX_META_PER_SEND (4 * KIB) - -/* max count of metadata slices for a single data extent */ -#define UNIFYFS_MAX_META_SPLIT_COUNT (4 * KIB) - -/* NOTE: The maximum size of an individual read operation is - * (UNIFYFS_MAX_META_SPLIT_COUNT * UNIFYFS_META_DEFAULT_SLICE_SZ) */ - - -/* Key for file attributes */ -typedef int fattr_key_t; - -/** - * Key for a file extent - */ -typedef struct { - /** global file id */ - int gfid; - /** logical file offset */ - size_t offset; -} unifyfs_key_t; - -#define UNIFYFS_KEY_SZ (sizeof(unifyfs_key_t)) -#define UNIFYFS_KEY_FID(keyp) (((unifyfs_key_t*)keyp)->gfid) -#define UNIFYFS_KEY_OFF(keyp) (((unifyfs_key_t*)keyp)->offset) - -typedef struct { - size_t addr; /* data offset in server */ - size_t len; /* length of data at addr */ - int app_id; /* application id in server */ - int rank; /* client id in server */ - int delegator_rank; /* delegator/server rank hosting data */ -} unifyfs_val_t; - -#define UNIFYFS_VAL_SZ (sizeof(unifyfs_val_t)) -#define UNIFYFS_VAL_ADDR(valp) (((unifyfs_val_t*)valp)->addr) -#define UNIFYFS_VAL_LEN(valp) (((unifyfs_val_t*)valp)->len) - -/** - * key-value tuple for a file extent - */ -typedef struct { - /** key */ - unifyfs_key_t key; - /** value */ - unifyfs_val_t val; -} unifyfs_keyval_t; - -int unifyfs_key_compare(unifyfs_key_t* a, unifyfs_key_t* b); -int unifyfs_keyval_compare(const void* a, const void* b); - -/* return number of slice ranges needed to cover range */ -size_t meta_num_slices(size_t offset, size_t length); - -int meta_sanitize(void); -int meta_init_store(unifyfs_cfg_t* cfg); - -void print_fsync_indices(unifyfs_key_t** unifyfs_keys, - unifyfs_val_t** unifyfs_vals, size_t num_entries); - -/** - * Retrieve a File attribute from the KV-Store. - * - * @param [in] gfid - * @param[out] *ptr_attr_val - * @return UNIFYFS_SUCCESS on success - */ -int unifyfs_get_file_attribute(int gfid, - unifyfs_file_attr_t* ptr_attr_val); - -/** - * Delete file attribute from the KV-Store. - * - * @param [in] gfid - * @return UNIFYFS_SUCCESS on success - */ -int unifyfs_delete_file_attribute(int gfid); - -/** - * Store a File attribute to the KV-Store. - * - * @param[in] size_flag - * @param[in] laminate_flag - * @param[in] *ptr_attr_val - * @return UNIFYFS_SUCCESS on success - */ -int unifyfs_set_file_attribute( - int size_flag, - int laminate_flag, - unifyfs_file_attr_t* ptr_attr_val); - -/** - * Store File attributes to the KV-Store. - * - * @param[in] num_entries number of key value pairs to store - * @param[in] keys array storing the keys - * @param[in] key_lens array with the length of the elements in \p keys - * @param[in] vals array with the values - * @param[in] val_lens array with the length of the elements in \p vals - */ -int unifyfs_set_file_attributes(int num_entries, - fattr_key_t** keys, int* key_lens, - unifyfs_file_attr_t** vals, int* val_lens); - -/** - * Retrieve File extents from the KV-Store. - * - * @param[in] num_keys number of keys - * @param[in] keys array of keys to retrieve the values for - * @param[in] key_lens array with the length of the key in \p keys - * @param[out] num_values number of values in the keyval array - * @param[out] keyval array containing the key-value tuples found - * @return UNIFYFS_SUCCESS on success - */ -int unifyfs_get_file_extents(int num_keys, - unifyfs_key_t** keys, int* key_lens, - int* num_values, unifyfs_keyval_t** keyval); - -/** - * Delete File extents from the KV-Store. - * - * @param[in] num_entries number of key value pairs to delete - * @param[in] keys array storing the keys - * @param[in] key_lens array with the length of the elements in \p keys - */ -int unifyfs_delete_file_extents(int num_entries, - unifyfs_key_t** keys, int* key_lens); - -/** - * Store File extents in the KV-Store. - * - * @param [in] num_entries number of key value pairs to store - * @param[in] keys array storing the keys - * @param[in] key_lens array with the length of the elements in \p keys - * @param[in] vals array with the values - * @param[in] val_lens array with the length of the elements in \p vals - * @return UNIFYFS_SUCCESS on success - */ -int unifyfs_set_file_extents(int num_entries, unifyfs_key_t** keys, - int* key_lens, unifyfs_val_t** vals, - int* val_lens); - - - -static inline -unifyfs_key_t** alloc_key_array(int elems) -{ - int size = elems * (sizeof(unifyfs_key_t*) + sizeof(unifyfs_key_t)); - - void* mem_block = calloc(size, sizeof(char)); - - unifyfs_key_t** array_ptr = mem_block; - unifyfs_key_t* key_ptr = (unifyfs_key_t*)(array_ptr + elems); - - for (int i = 0; i < elems; i++) { - array_ptr[i] = &key_ptr[i]; - } - - return (unifyfs_key_t**)mem_block; -} - -static inline -unifyfs_val_t** alloc_value_array(int elems) -{ - int size = elems * (sizeof(unifyfs_val_t*) + sizeof(unifyfs_val_t)); - - void* mem_block = calloc(size, sizeof(char)); - - unifyfs_val_t** array_ptr = mem_block; - unifyfs_val_t* key_ptr = (unifyfs_val_t*)(array_ptr + elems); - - for (int i = 0; i < elems; i++) { - array_ptr[i] = &key_ptr[i]; - } - - return (unifyfs_val_t**)mem_block; -} - -static inline -void free_key_array(unifyfs_key_t** array) -{ - free(array); -} - -static inline -void free_value_array(unifyfs_val_t** array) -{ - free(array); -} - -static inline -void debug_log_key_val(const char* ctx, - unifyfs_key_t* key, - unifyfs_val_t* val) -{ - if ((key != NULL) && (val != NULL)) { - LOGDBG("@%s - key(gfid=%d, offset=%lu), " - "val(del=%d, len=%lu, addr=%lu, app=%d, rank=%d)", - ctx, key->gfid, key->offset, - val->delegator_rank, val->len, val->addr, - val->app_id, val->rank); - } else if (key != NULL) { - LOGDBG("@%s - key(gfid=%d, offset=%lu)", - ctx, key->gfid, key->offset); - } -} - - -#endif // UNIFYFS_METADATA_MDHIM_H diff --git a/server/src/unifyfs_p2p_rpc.c b/server/src/unifyfs_p2p_rpc.c index 5c918aa9d..e2927c00b 100644 --- a/server/src/unifyfs_p2p_rpc.c +++ b/server/src/unifyfs_p2p_rpc.c @@ -28,7 +28,7 @@ int hash_gfid_to_server(int gfid) } /* helper method to initialize peer request rpc handle */ -int get_p2p_request_handle(hg_id_t request_hgid, +int init_p2p_request_handle(hg_id_t request_hgid, int peer_rank, p2p_request* req) { @@ -82,6 +82,7 @@ int wait_for_p2p_request(p2p_request* req) if (hret != HG_SUCCESS) { LOGERR("wait on p2p request(%p) failed - %s", req, HG_Error_to_string(hret)); + //margo_state_dump(unifyfsd_rpc_context->svr_mid, "-", 0, NULL); rc = UNIFYFS_ERROR_MARGO; } @@ -119,7 +120,7 @@ int invoke_chunk_read_request_rpc(int dst_srvr_rank, /* forward request to file owner */ p2p_request preq; hg_id_t req_hgid = unifyfsd_rpc_context->rpcs.chunk_read_request_id; - int rc = get_p2p_request_handle(req_hgid, dst_srvr_rank, &preq); + int rc = init_p2p_request_handle(req_hgid, dst_srvr_rank, &preq); if (rc != UNIFYFS_SUCCESS) { return rc; } @@ -185,8 +186,8 @@ static void chunk_read_request_rpc(hg_handle_t handle) hg_return_t hret; /* get input params */ - chunk_read_request_in_t* in = malloc(sizeof(*in)); - server_rpc_req_t* req = malloc(sizeof(*req)); + chunk_read_request_in_t* in = calloc(1, sizeof(*in)); + server_rpc_req_t* req = calloc(1, sizeof(*req)); if ((NULL == in) || (NULL == req)) { ret = ENOMEM; } else { @@ -263,7 +264,7 @@ int invoke_chunk_read_response_rpc(server_chunk_reads_t* scr) /* forward response to requesting server */ p2p_request preq; hg_id_t req_hgid = unifyfsd_rpc_context->rpcs.chunk_read_response_id; - int rc = get_p2p_request_handle(req_hgid, dst_rank, &preq); + int rc = init_p2p_request_handle(req_hgid, dst_rank, &preq); if (rc != UNIFYFS_SUCCESS) { return rc; } @@ -423,7 +424,7 @@ int unifyfs_invoke_add_extents_rpc(int gfid, /* forward request to file owner */ p2p_request preq; hg_id_t req_hgid = unifyfsd_rpc_context->rpcs.extent_add_id; - int rc = get_p2p_request_handle(req_hgid, owner_rank, &preq); + int rc = init_p2p_request_handle(req_hgid, owner_rank, &preq); if (rc != UNIFYFS_SUCCESS) { return rc; } @@ -488,8 +489,8 @@ static void add_extents_rpc(hg_handle_t handle) int ret = UNIFYFS_SUCCESS; /* get input params */ - add_extents_in_t* in = malloc(sizeof(*in)); - server_rpc_req_t* req = malloc(sizeof(*req)); + add_extents_in_t* in = calloc(1, sizeof(*in)); + server_rpc_req_t* req = calloc(1, sizeof(*req)); if ((NULL == in) || (NULL == req)) { ret = ENOMEM; } else { @@ -503,7 +504,7 @@ static void add_extents_rpc(hg_handle_t handle) /* allocate memory for extents */ void* extents_buf = pull_margo_bulk_buffer(handle, in->extents, - bulk_sz, NULL); + bulk_sz, NULL); if (NULL == extents_buf) { LOGERR("failed to get bulk extents"); ret = UNIFYFS_ERROR_MARGO; @@ -607,7 +608,7 @@ int unifyfs_invoke_find_extents_rpc(int gfid, p2p_request preq; margo_instance_id mid = unifyfsd_rpc_context->svr_mid; hg_id_t req_hgid = unifyfsd_rpc_context->rpcs.extent_lookup_id; - int rc = get_p2p_request_handle(req_hgid, owner_rank, &preq); + int rc = init_p2p_request_handle(req_hgid, owner_rank, &preq); if (rc != UNIFYFS_SUCCESS) { return rc; } @@ -688,8 +689,8 @@ static void find_extents_rpc(hg_handle_t handle) int32_t ret; /* get input params */ - find_extents_in_t* in = malloc(sizeof(*in)); - server_rpc_req_t* req = malloc(sizeof(*req)); + find_extents_in_t* in = calloc(1, sizeof(*in)); + server_rpc_req_t* req = calloc(1, sizeof(*req)); if ((NULL == in) || (NULL == req)) { ret = ENOMEM; } else { @@ -780,64 +781,91 @@ int unifyfs_invoke_metaget_rpc(int gfid, /* use cached attributes if within threshold */ struct timespec tp = {0}; clock_gettime(CLOCK_REALTIME, &tp); - time_t expire = attrs->ctime.tv_sec + UNIFYFS_METADATA_CACHE_SECONDS; + time_t expire = attrs->last_update + UNIFYFS_METADATA_CACHE_SECONDS; if (tp.tv_sec <= expire) { LOGINFO("using cached attributes for gfid=%d", gfid); return UNIFYFS_SUCCESS; } else { - LOGINFO("cached attributes for gfid=%d have expired", gfid); + LOGINFO("cached attributes for gfid=%d have expired " + "(now=%d, expiration=%d)", gfid, tp.tv_sec, expire); } } else if (rc == ENOENT) { - /* metaget above failed with ENOENT, need to create inode */ + /* local metaget gave ENOENT, need to create inode if file exists */ need_local_metadata = 1; } - /* forward request to file owner */ - p2p_request preq; - hg_id_t req_hgid = unifyfsd_rpc_context->rpcs.metaget_id; - rc = get_p2p_request_handle(req_hgid, owner_rank, &preq); - if (rc != UNIFYFS_SUCCESS) { + int ret = UNIFYFS_SUCCESS; + rc = add_pending_metaget(gfid); + if (EEXIST == rc) { + /* wait for pending to finish */ + do { + LOGDBG("waiting for pending metaget gfid=%d", gfid); + usleep(10000); /* sleep 10 ms */ + } while (check_pending_metaget(gfid)); + + /* should have local copy now if file existed */ + rc = sm_get_fileattr(gfid, attrs); return rc; - } + } else { + LOGDBG("added pending metaget gfid=%d", gfid); - /* fill rpc input struct and forward request */ - metaget_in_t in; - in.gfid = (int32_t) gfid; - rc = forward_p2p_request((void*)&in, &preq); - if (rc != UNIFYFS_SUCCESS) { - margo_destroy(preq.handle); - return rc; - } + /* forward request to file owner */ + p2p_request preq; + hg_id_t req_hgid = unifyfsd_rpc_context->rpcs.metaget_id; + rc = init_p2p_request_handle(req_hgid, owner_rank, &preq); + if (rc != UNIFYFS_SUCCESS) { + ret = rc; + goto clear_pending_metaget; + } - /* wait for request completion */ - rc = wait_for_p2p_request(&preq); - if (rc != UNIFYFS_SUCCESS) { - margo_destroy(preq.handle); - return rc; - } + /* fill rpc input struct and forward request */ + metaget_in_t in; + in.gfid = (int32_t) gfid; + rc = forward_p2p_request((void*)&in, &preq); + if (rc != UNIFYFS_SUCCESS) { + ret = rc; + goto clear_pending_metaget; + } - /* get the output of the rpc */ - int ret; - metaget_out_t out; - hg_return_t hret = margo_get_output(preq.handle, &out); - if (hret != HG_SUCCESS) { - LOGERR("margo_get_output() failed - %s", HG_Error_to_string(hret)); - ret = UNIFYFS_ERROR_MARGO; - } else { - /* set return value */ - ret = out.ret; - if (ret == UNIFYFS_SUCCESS) { - *attrs = out.attr; - if (out.attr.filename != NULL) { - attrs->filename = strdup(out.attr.filename); - } - if (need_local_metadata) { - sm_set_fileattr(gfid, UNIFYFS_FILE_ATTR_OP_CREATE, attrs); + /* wait for request completion */ + rc = wait_for_p2p_request(&preq); + if (rc != UNIFYFS_SUCCESS) { + ret = rc; + goto clear_pending_metaget; + } + + /* get the output of the rpc */ + metaget_out_t out; + hg_return_t hret = margo_get_output(preq.handle, &out); + if (hret != HG_SUCCESS) { + LOGERR("margo_get_output() failed - %s", HG_Error_to_string(hret)); + ret = UNIFYFS_ERROR_MARGO; + } else { + /* set return value */ + ret = out.ret; + if (ret == UNIFYFS_SUCCESS) { + *attrs = out.attr; + if (out.attr.filename != NULL) { + attrs->filename = strdup(out.attr.filename); + } + if (need_local_metadata) { + sm_set_fileattr(gfid, UNIFYFS_FILE_ATTR_OP_CREATE, attrs); + } else { + sm_set_fileattr(gfid, UNIFYFS_FILE_ATTR_OP_UTIME, attrs); + } } + margo_free_output(preq.handle, &out); } - margo_free_output(preq.handle, &out); + +clear_pending_metaget: + LOGDBG("clearing pending metaget gfid=%d", gfid); + rc = clear_pending_metaget(gfid); + if (rc != UNIFYFS_SUCCESS) { + LOGWARN("failed to clear pending metaget for gfid=%d", gfid); + } + + margo_destroy(preq.handle); } - margo_destroy(preq.handle); return ret; } @@ -850,8 +878,8 @@ static void metaget_rpc(hg_handle_t handle) int ret = UNIFYFS_SUCCESS; /* get input params */ - metaget_in_t* in = malloc(sizeof(*in)); - server_rpc_req_t* req = malloc(sizeof(*req)); + metaget_in_t* in = calloc(1, sizeof(*in)); + server_rpc_req_t* req = calloc(1, sizeof(*req)); if ((NULL == in) || (NULL == req)) { ret = ENOMEM; } else { @@ -908,62 +936,100 @@ int unifyfs_invoke_filesize_rpc(int gfid, if (NULL == filesize) { return EINVAL; } + *filesize = 0; int owner_rank = hash_gfid_to_server(gfid); + int need_local_metadata = 0; - /* do local inode metadata lookup to check for laminated */ unifyfs_file_attr_t attrs; + memset(&attrs, 0, sizeof(attrs)); + + /* do local inode metadata lookup to check for laminated */ int rc = sm_get_fileattr(gfid, &attrs); - if ((rc == UNIFYFS_SUCCESS) && (attrs.is_laminated)) { - /* if laminated, we already have final metadata stored locally */ - *filesize = (size_t) attrs.size; - return UNIFYFS_SUCCESS; - } if (owner_rank == glb_pmi_rank) { *filesize = (size_t) attrs.size; return rc; - } + } else if (rc == UNIFYFS_SUCCESS) { + if (attrs.is_laminated) { + /* if laminated, we already have final metadata stored locally */ + *filesize = (size_t) attrs.size; + return UNIFYFS_SUCCESS; + } - /* forward request to file owner */ - p2p_request preq; - hg_id_t req_hgid = unifyfsd_rpc_context->rpcs.filesize_id; - rc = get_p2p_request_handle(req_hgid, owner_rank, &preq); - if (rc != UNIFYFS_SUCCESS) { - return rc; + /* NOTE: unlike metaget above, we don't use cached metadata + * for explicit file size lookups */ + } else if (rc == ENOENT) { + /* local metaget gave ENOENT, need to create inode if file exists */ + need_local_metadata = 1; } - /* fill rpc input struct and forward request */ - filesize_in_t in; - in.gfid = (int32_t)gfid; - rc = forward_p2p_request((void*)&in, &preq); - if (rc != UNIFYFS_SUCCESS) { - margo_destroy(preq.handle); + int ret = UNIFYFS_SUCCESS; + rc = add_pending_metaget(gfid); + if (EEXIST == rc) { + /* wait for pending to finish */ + do { + usleep(10000); /* sleep 10 ms */ + } while (check_pending_metaget(gfid)); + + /* should have local copy now if file existed */ + rc = sm_get_fileattr(gfid, &attrs); + *filesize = (size_t) attrs.size; return rc; - } + } else { + /* forward request to file owner */ + p2p_request preq; + hg_id_t req_hgid = unifyfsd_rpc_context->rpcs.metaget_id; + rc = init_p2p_request_handle(req_hgid, owner_rank, &preq); + if (rc != UNIFYFS_SUCCESS) { + ret = rc; + goto clear_pending_fileattr; + } - /* wait for request completion */ - rc = wait_for_p2p_request(&preq); - if (rc != UNIFYFS_SUCCESS) { - margo_destroy(preq.handle); - return rc; - } + /* fill rpc input struct and forward request */ + metaget_in_t in; + in.gfid = (int32_t) gfid; + rc = forward_p2p_request((void*)&in, &preq); + if (rc != UNIFYFS_SUCCESS) { + ret = rc; + goto clear_pending_fileattr; + } - /* get the output of the rpc */ - int ret; - filesize_out_t out; - hg_return_t hret = margo_get_output(preq.handle, &out); - if (hret != HG_SUCCESS) { - LOGERR("margo_get_output() failed - %s", HG_Error_to_string(hret)); - ret = UNIFYFS_ERROR_MARGO; - } else { - /* set return value */ - ret = out.ret; - if (ret == UNIFYFS_SUCCESS) { - *filesize = (size_t) out.filesize; + /* wait for request completion */ + rc = wait_for_p2p_request(&preq); + if (rc != UNIFYFS_SUCCESS) { + ret = rc; + goto clear_pending_fileattr; } - margo_free_output(preq.handle, &out); + + /* get the output of the rpc */ + metaget_out_t out; + hg_return_t hret = margo_get_output(preq.handle, &out); + if (hret != HG_SUCCESS) { + LOGERR("margo_get_output() failed - %s", HG_Error_to_string(hret)); + ret = UNIFYFS_ERROR_MARGO; + } else { + /* set return value */ + ret = out.ret; + if (ret == UNIFYFS_SUCCESS) { + attrs = out.attr; + *filesize = (size_t) attrs.size; + if (need_local_metadata) { + sm_set_fileattr(gfid, UNIFYFS_FILE_ATTR_OP_CREATE, &attrs); + } else { + sm_set_fileattr(gfid, UNIFYFS_FILE_ATTR_OP_UTIME, &attrs); + } + } + margo_free_output(preq.handle, &out); + } + +clear_pending_fileattr: + rc = clear_pending_metaget(gfid); + if (rc != UNIFYFS_SUCCESS) { + LOGWARN("failed to clear pending metaget for gfid=%d", gfid); + } + + margo_destroy(preq.handle); } - margo_destroy(preq.handle); return ret; } @@ -976,8 +1042,8 @@ static void filesize_rpc(hg_handle_t handle) int ret = UNIFYFS_SUCCESS; /* get input params */ - filesize_in_t* in = malloc(sizeof(*in)); - server_rpc_req_t* req = malloc(sizeof(*req)); + filesize_in_t* in = calloc(1, sizeof(*in)); + server_rpc_req_t* req = calloc(1, sizeof(*req)); if ((NULL == in) || (NULL == req)) { ret = ENOMEM; } else { @@ -1050,7 +1116,7 @@ int unifyfs_invoke_metaset_rpc(int gfid, /* forward request to file owner */ p2p_request preq; hg_id_t req_hgid = unifyfsd_rpc_context->rpcs.metaset_id; - int rc = get_p2p_request_handle(req_hgid, owner_rank, &preq); + int rc = init_p2p_request_handle(req_hgid, owner_rank, &preq); if (rc != UNIFYFS_SUCCESS) { return rc; } @@ -1097,8 +1163,8 @@ static void metaset_rpc(hg_handle_t handle) int ret = UNIFYFS_SUCCESS; /* get input params */ - metaset_in_t* in = malloc(sizeof(*in)); - server_rpc_req_t* req = malloc(sizeof(*req)); + metaset_in_t* in = calloc(1, sizeof(*in)); + server_rpc_req_t* req = calloc(1, sizeof(*req)); if ((NULL == in) || (NULL == req)) { ret = ENOMEM; } else { @@ -1160,7 +1226,7 @@ int unifyfs_invoke_laminate_rpc(int gfid) /* forward request to file owner */ p2p_request preq; hg_id_t req_hgid = unifyfsd_rpc_context->rpcs.laminate_id; - int rc = get_p2p_request_handle(req_hgid, owner_rank, &preq); + int rc = init_p2p_request_handle(req_hgid, owner_rank, &preq); if (rc != UNIFYFS_SUCCESS) { return rc; } @@ -1205,8 +1271,8 @@ static void laminate_rpc(hg_handle_t handle) int ret = UNIFYFS_SUCCESS; /* get input params */ - laminate_in_t* in = malloc(sizeof(*in)); - server_rpc_req_t* req = malloc(sizeof(*req)); + laminate_in_t* in = calloc(1, sizeof(*in)); + server_rpc_req_t* req = calloc(1, sizeof(*req)); if ((NULL == in) || (NULL == req)) { ret = ENOMEM; } else { @@ -1272,7 +1338,7 @@ int unifyfs_invoke_transfer_rpc(int client_app, /* forward request to file owner */ p2p_request preq; hg_id_t req_hgid = unifyfsd_rpc_context->rpcs.transfer_id; - int rc = get_p2p_request_handle(req_hgid, owner_rank, &preq); + int rc = init_p2p_request_handle(req_hgid, owner_rank, &preq); if (rc != UNIFYFS_SUCCESS) { return rc; } @@ -1324,8 +1390,8 @@ static void transfer_rpc(hg_handle_t handle) int ret = UNIFYFS_SUCCESS; /* get input params */ - transfer_in_t* in = malloc(sizeof(*in)); - server_rpc_req_t* req = malloc(sizeof(*req)); + transfer_in_t* in = calloc(1, sizeof(*in)); + server_rpc_req_t* req = calloc(1, sizeof(*req)); if ((NULL == in) || (NULL == req)) { ret = ENOMEM; } else { @@ -1386,7 +1452,7 @@ int unifyfs_invoke_truncate_rpc(int gfid, /* forward request to file owner */ p2p_request preq; hg_id_t req_hgid = unifyfsd_rpc_context->rpcs.truncate_id; - int rc = get_p2p_request_handle(req_hgid, owner_rank, &preq); + int rc = init_p2p_request_handle(req_hgid, owner_rank, &preq); if (rc != UNIFYFS_SUCCESS) { return rc; } @@ -1433,8 +1499,8 @@ static void truncate_rpc(hg_handle_t handle) int ret = UNIFYFS_SUCCESS; /* get input params */ - truncate_in_t* in = malloc(sizeof(*in)); - server_rpc_req_t* req = malloc(sizeof(*req)); + truncate_in_t* in = calloc(1, sizeof(*in)); + server_rpc_req_t* req = calloc(1, sizeof(*req)); if ((NULL == in) || (NULL == req)) { ret = ENOMEM; } else { @@ -1487,7 +1553,7 @@ int unifyfs_invoke_server_pid_rpc(void) /* forward pid to server rank 0 */ p2p_request preq; hg_id_t req_hgid = unifyfsd_rpc_context->rpcs.server_pid_id; - int rc = get_p2p_request_handle(req_hgid, 0, &preq); + int rc = init_p2p_request_handle(req_hgid, 0, &preq); if (rc != UNIFYFS_SUCCESS) { return rc; } @@ -1533,47 +1599,25 @@ static void server_pid_rpc(hg_handle_t handle) int ret = UNIFYFS_SUCCESS; /* get input params */ - server_pid_in_t* in = malloc(sizeof(*in)); - server_rpc_req_t* req = malloc(sizeof(*req)); - if ((NULL == in) || (NULL == req)) { - ret = ENOMEM; + server_pid_in_t in; + hg_return_t hret = margo_get_input(handle, &in); + if (hret != HG_SUCCESS) { + LOGERR("margo_get_input() failed"); + ret = UNIFYFS_ERROR_MARGO; } else { - hg_return_t hret = margo_get_input(handle, in); - if (hret != HG_SUCCESS) { - LOGERR("margo_get_input() failed"); - ret = UNIFYFS_ERROR_MARGO; - } else { - req->req_type = UNIFYFS_SERVER_RPC_PID_REPORT; - req->handle = handle; - req->input = (void*) in; - req->bulk_buf = NULL; - req->bulk_sz = 0; - ret = sm_submit_service_request(req); - if (ret != UNIFYFS_SUCCESS) { - margo_free_input(handle, in); - } - } + ret = unifyfs_report_server_pid(in.rank, in.pid); + margo_free_input(handle, &in); } - /* if we hit an error during request submission, respond with the error */ - if (ret != UNIFYFS_SUCCESS) { - if (NULL != in) { - free(in); - } - if (NULL != req) { - free(req); - } - - /* return to caller */ - server_pid_out_t out; - out.ret = (int32_t) ret; - hg_return_t hret = margo_respond(handle, &out); - if (hret != HG_SUCCESS) { - LOGERR("margo_respond() failed"); - } - - /* free margo resources */ - margo_destroy(handle); + /* return to caller */ + server_pid_out_t out; + out.ret = (int32_t) ret; + hret = margo_respond(handle, &out); + if (hret != HG_SUCCESS) { + LOGERR("margo_respond() failed"); } + + /* free margo resources */ + margo_destroy(handle); } DEFINE_MARGO_RPC_HANDLER(server_pid_rpc) diff --git a/server/src/unifyfs_p2p_rpc.h b/server/src/unifyfs_p2p_rpc.h index 50e6a41c0..0f764cff7 100644 --- a/server/src/unifyfs_p2p_rpc.h +++ b/server/src/unifyfs_p2p_rpc.h @@ -34,9 +34,9 @@ typedef struct { } p2p_request; /* helper method to initialize peer request rpc handle */ -int get_p2p_request_handle(hg_id_t request_hgid, - int peer_rank, - p2p_request* req); +int init_p2p_request_handle(hg_id_t request_hgid, + int peer_rank, + p2p_request* req); /* helper method to forward peer rpc request */ int forward_p2p_request(void* input_ptr, diff --git a/server/src/unifyfs_request_manager.c b/server/src/unifyfs_request_manager.c index 708d2df73..704e6aa57 100644 --- a/server/src/unifyfs_request_manager.c +++ b/server/src/unifyfs_request_manager.c @@ -32,7 +32,6 @@ // server components #include "unifyfs_inode_tree.h" -#include "unifyfs_metadata_mdhim.h" #include "unifyfs_request_manager.h" #include "unifyfs_service_manager.h" @@ -161,6 +160,7 @@ reqmgr_thrd_t* unifyfs_rm_thrd_create(int app_id, int client_id) thrd_ctrl->client_id = client_id; /* initialize flow control flags */ + thrd_ctrl->attached = 0; thrd_ctrl->exit_flag = 0; thrd_ctrl->exited = 0; thrd_ctrl->waiting_for_work = 0; @@ -287,127 +287,6 @@ static void signal_new_responses(reqmgr_thrd_t* reqmgr) } } -/* issue remote chunk read requests for extent chunks - * listed within keyvals */ -int rm_create_chunk_requests(reqmgr_thrd_t* thrd_ctrl, - server_read_req_t* rdreq, - int num_vals, - unifyfs_keyval_t* keyvals) -{ - LOGDBG("creating chunk requests for rdreq %d", rdreq->req_ndx); - - /* allocate read request structures */ - chunk_read_req_t* all_chunk_reads = (chunk_read_req_t*) - calloc((size_t)num_vals, sizeof(chunk_read_req_t)); - if (NULL == all_chunk_reads) { - LOGERR("failed to allocate chunk-reads array"); - return ENOMEM; - } - rdreq->chunks = all_chunk_reads; - - /* iterate over write index values and create read requests - * for each one, also count up number of servers that we'll - * forward read requests to */ - int i; - int prev_del = -1; - int num_del = 0; - for (i = 0; i < num_vals; i++) { - /* get target server for this request */ - int curr_del = keyvals[i].val.delegator_rank; - - /* if target server is different from last target, - * increment our server count */ - if ((prev_del == -1) || (curr_del != prev_del)) { - num_del++; - } - prev_del = curr_del; - - /* get pointer to next read request structure */ - debug_log_key_val(__func__, &keyvals[i].key, &keyvals[i].val); - chunk_read_req_t* chk = all_chunk_reads + i; - - /* fill in chunk read request */ - chk->gfid = keyvals[i].key.gfid; - chk->nbytes = keyvals[i].val.len; - chk->offset = keyvals[i].key.offset; - chk->log_offset = keyvals[i].val.addr; - chk->log_app_id = keyvals[i].val.app_id; - chk->log_client_id = keyvals[i].val.rank; - } - - /* allocate per-delgator chunk-reads */ - int num_dels = num_del; - rdreq->num_server_reads = num_dels; - rdreq->remote_reads = (server_chunk_reads_t*) - calloc((size_t)num_dels, sizeof(server_chunk_reads_t)); - if (NULL == rdreq->remote_reads) { - LOGERR("failed to allocate remote-reads array"); - return ENOMEM; - } - - /* get pointer to start of chunk read request array */ - server_chunk_reads_t* reads = rdreq->remote_reads; - - /* iterate over write index values again and now create - * per-server chunk-reads info, for each server - * that we'll request data from, this totals up the number - * of read requests and total read data size from that - * server */ - prev_del = -1; - size_t del_data_sz = 0; - for (i = 0; i < num_vals; i++) { - /* get target server for this request */ - int curr_del = keyvals[i].val.delegator_rank; - - /* if target server is different from last target, - * close out the total number of bytes for the last - * server, note this assumes our write index values are - * sorted by server rank */ - if ((prev_del != -1) && (curr_del != prev_del)) { - /* record total data for previous server */ - reads->total_sz = del_data_sz; - - /* advance to read request for next server */ - reads += 1; - - /* reset our running tally of bytes to 0 */ - del_data_sz = 0; - } - prev_del = curr_del; - - /* update total read data size for current server */ - del_data_sz += keyvals[i].val.len; - - /* if this is the first read request for this server, - * initialize fields on the per-server read request - * structure */ - if (0 == reads->num_chunks) { - reads->rank = curr_del; - reads->rdreq_id = rdreq->req_ndx; - reads->reqs = all_chunk_reads + i; - reads->resp = NULL; - } - - /* increment number of read requests we're sending - * to this server */ - reads->num_chunks++; - } - - /* record total data size for final server (if any), - * would have missed doing this in the above loop */ - if (num_vals > 0) { - reads->total_sz = del_data_sz; - } - - /* mark request as ready to be started */ - rdreq->status = READREQ_READY; - - /* wake up the request manager thread for the requesting client */ - signal_new_requests(thrd_ctrl); - - return UNIFYFS_SUCCESS; -} - int rm_submit_read_request(server_read_req_t* req) { int ret = UNIFYFS_SUCCESS; @@ -1072,6 +951,8 @@ static int process_attach_rpc(reqmgr_thrd_t* reqmgr, in->meta_size); if (ret != UNIFYFS_SUCCESS) { LOGERR("attach_app_client() failed"); + } else { + reqmgr->attached = 1; } } else { LOGERR("client not found (app_id=%d, client_id=%d)", @@ -1151,21 +1032,21 @@ static int process_fsync_rpc(reqmgr_thrd_t* reqmgr, .app_id = reqmgr->app_id, .client_id = reqmgr->client_id, }; - ret = unifyfs_fops_fsync(&ctx, gfid); + ret = unifyfs_fops_fsync(&ctx, gfid, req); if (ret != UNIFYFS_SUCCESS) { LOGERR("unifyfs_fops_fsync() failed"); - } - /* send rpc response */ - unifyfs_fsync_out_t out; - out.ret = (int32_t) ret; - hg_return_t hret = margo_respond(req->handle, &out); - if (hret != HG_SUCCESS) { - LOGERR("margo_respond() failed"); - } + /* send rpc response */ + unifyfs_fsync_out_t out; + out.ret = (int32_t) ret; + hg_return_t hret = margo_respond(req->handle, &out); + if (hret != HG_SUCCESS) { + LOGERR("margo_respond() failed"); + } - /* cleanup req */ - margo_destroy(req->handle); + /* cleanup req */ + margo_destroy(req->handle); + } return ret; } @@ -1554,12 +1435,28 @@ static int process_get_gfids(reqmgr_thrd_t* reqmgr, { int ret = UNIFYFS_SUCCESS; - int* gfid_list; - int num_gfids; + unifyfs_file_attr_t* remote_file_attrs; + int num_file_attrs; - ret = unifyfs_fops_get_gfids(&gfid_list, &num_gfids); - if (ret != UNIFYFS_SUCCESS) { - LOGERR("unifyfs_fops_get_gfids() failed"); + /* Submit a broadcast metaget_all request and wait for it to complete. */ + // TODO: This is actually horribly wasteful: we're fetching all the + // metadata for all files, but then just save the gfids and issue a + // separate request for each gfid. + // Now that we have all the metadata from remote servers, we + // need to completely re-work how the the unifyfs-ls util works. + + ret = unifyfs_invoke_broadcast_metaget_all(&remote_file_attrs, + &num_file_attrs); + if (UNIFYFS_SUCCESS != ret) { + LOGERR("unifyfs_invoke_broadcast_metaget_all() failed"); + return ret; + } + + // Package all the gfids up into one list + int* new_gfid_list = calloc(num_file_attrs, sizeof(int)); + // TODO: error checking! + for (unsigned int i = 0; i < num_file_attrs; i++) { + new_gfid_list[i] = remote_file_attrs[i].gfid; } /* send rpc response */ @@ -1567,19 +1464,19 @@ static int process_get_gfids(reqmgr_thrd_t* reqmgr, unifyfs_get_gfids_out_t out; /* initialize bulk handle for the gfid_list */ - hg_size_t segment_sizes[1] = { num_gfids * sizeof(int) }; - void* segment_ptrs[1] = { (void*)gfid_list }; + hg_size_t segment_sizes[1] = { num_file_attrs * sizeof(int) }; + void* segment_ptrs[1] = { (void*)new_gfid_list }; hg_return_t hret = margo_bulk_create(unifyfsd_rpc_context->shm_mid, 1, segment_ptrs, segment_sizes, HG_BULK_READ_ONLY, &out.bulk_gfids); /* Note: unifyfsd_rpc_context defined in margo_server.h */ if (hret != HG_SUCCESS) { - free(gfid_list); + free(new_gfid_list); return UNIFYFS_ERROR_MARGO; } out.ret = (int32_t) ret; - out.num_gfids = num_gfids; + out.num_gfids = num_file_attrs; hret = margo_respond(req->handle, &out); if (hret != HG_SUCCESS) { LOGERR("margo_respond() failed"); @@ -1588,11 +1485,9 @@ static int process_get_gfids(reqmgr_thrd_t* reqmgr, /* cleanup req */ margo_destroy(req->handle); margo_bulk_free(out.bulk_gfids); - free(gfid_list); + free(new_gfid_list); return ret; - - } /* iterate over list of chunk reads and send responses */ @@ -1649,6 +1544,9 @@ static int rm_process_client_requests(reqmgr_thrd_t* reqmgr) rret = process_read_rpc(reqmgr, req); break; case UNIFYFS_CLIENT_RPC_SYNC: + /* we remove this req since it will be finished by the svcmgr and + * we don't want it deleted below as part of arraylist_free() */ + req = arraylist_remove(client_reqs, i); rret = process_fsync_rpc(reqmgr, req); break; case UNIFYFS_CLIENT_RPC_TRANSFER: @@ -1696,6 +1594,10 @@ static int rm_heartbeat(reqmgr_thrd_t* reqmgr) int ret = UNIFYFS_SUCCESS; + if (!reqmgr->attached) { + return ret; + } + /* send a heartbeat rpc to associated client every 30 seconds */ time_t now = time(NULL); if (0 == last_check) { diff --git a/server/src/unifyfs_request_manager.h b/server/src/unifyfs_request_manager.h index bfb1910e1..e3341b44c 100644 --- a/server/src/unifyfs_request_manager.h +++ b/server/src/unifyfs_request_manager.h @@ -33,7 +33,6 @@ #include "unifyfs_global.h" #include "unifyfs_inode.h" #include "unifyfs_fops.h" -#include "unifyfs_metadata_mdhim.h" typedef struct { client_callback_e req_type; @@ -42,14 +41,6 @@ typedef struct { int gfid; } client_callback_req; -typedef struct { - client_rpc_e req_type; - hg_handle_t handle; - void* input; - void* bulk_buf; - size_t bulk_sz; -} client_rpc_req_t; - typedef struct { readreq_status_e status; /* aggregate request status */ int in_use; /* currently using this req? */ @@ -97,6 +88,9 @@ typedef struct reqmgr_thrd { /* list of client callback requests */ arraylist_t* client_callbacks; + /* flag set when client has fully attached */ + int attached; + /* flag set to indicate request manager thread should exit */ int exit_flag; @@ -115,13 +109,6 @@ server_read_req_t* rm_reserve_read_req(reqmgr_thrd_t* thrd_ctrl); int rm_release_read_req(reqmgr_thrd_t* thrd_ctrl, server_read_req_t* rdreq); -/* issue remote chunk read requests for extent chunks - * listed within keyvals */ -int rm_create_chunk_requests(reqmgr_thrd_t* thrd_ctrl, - server_read_req_t* rdreq, - int num_vals, - unifyfs_keyval_t* keyvals); - /* create Request Manager thread for application client */ reqmgr_thrd_t* unifyfs_rm_thrd_create(int app_id, int client_id); diff --git a/server/src/unifyfs_server.c b/server/src/unifyfs_server.c index a9530bb17..f226f492b 100644 --- a/server/src/unifyfs_server.c +++ b/server/src/unifyfs_server.c @@ -37,7 +37,6 @@ // server components #include "unifyfs_global.h" -#include "unifyfs_metadata_mdhim.h" #include "unifyfs_request_manager.h" #include "unifyfs_service_manager.h" #include "unifyfs_inode_tree.h" @@ -63,6 +62,11 @@ static app_config* app_configs[UNIFYFS_SERVER_MAX_NUM_APPS]; /* list of apps */ static size_t clients_per_app = UNIFYFS_SERVER_MAX_APP_CLIENTS; +/* arraylist and mutex to track pending remote metaget() requests */ +arraylist_t* pending_metagets; // = NULL +static ABT_mutex pending_metagets_abt_sync; + + static int unifyfs_exit(void); #if defined(UNIFYFS_MULTIPLE_DELEGATORS) @@ -210,18 +214,7 @@ static int get_server_rank_and_size(const unifyfs_cfg_t* cfg) { int rc; -#if defined(UNIFYFSD_USE_MPI) - /* use rank and size of MPI communicator */ - rc = MPI_Comm_rank(MPI_COMM_WORLD, &glb_pmi_rank); - if (rc != MPI_SUCCESS) { - exit(1); - } - - rc = MPI_Comm_size(MPI_COMM_WORLD, &glb_pmi_size); - if (rc != MPI_SUCCESS) { - exit(1); - } -#elif !defined(USE_PMIX) && !defined(USE_PMI2) +#if !defined(USE_PMIX) && !defined(USE_PMI2) /* if not using PMIX or PMI2, * initialize rank/size to assume a singleton job */ glb_pmi_rank = 0; @@ -361,15 +354,8 @@ int main(int argc, char* argv[]) // print config unifyfs_config_print(&server_cfg, unifyfs_log_stream); - // initialize MPI and PMI if we're using them -#if defined(UNIFYFSD_USE_MPI) - int provided; - rc = MPI_Init_thread(NULL, NULL, MPI_THREAD_MULTIPLE, &provided); - if (rc != MPI_SUCCESS) { - LOGERR("failed to initialize MPI"); - exit(1); - } -#elif defined(USE_PMIX) + // initialize PMI if we're using it +#if defined(USE_PMIX) rc = unifyfs_pmix_init(); if (rc != (int)UNIFYFS_SUCCESS) { LOGERR("failed to initialize PMIX"); @@ -461,6 +447,12 @@ int main(int argc, char* argv[]) failed_clients = arraylist_create(0); ABT_mutex_unlock(app_configs_abt_sync); + ABT_mutex_create(&pending_metagets_abt_sync); + + ABT_mutex_lock(pending_metagets_abt_sync); + pending_metagets = arraylist_create(256); + ABT_mutex_unlock(pending_metagets_abt_sync); + /* launch the service manager (note: must happen after ABT_init) */ LOGDBG("launching service manager thread"); rc = svcmgr_init(); @@ -486,10 +478,10 @@ int main(int argc, char* argv[]) /* initialize our tree that maps a gfid to its extent tree */ unifyfs_inode_tree_init(global_inode_tree); - LOGDBG("publishing server pid"); - rc = unifyfs_publish_server_pids(); + LOGDBG("waiting for server bootstrapping to complete"); + rc = unifyfs_complete_bootstrap(); if (rc != 0) { - LOGERR("failed to publish server pid file: %s", + LOGERR("failed to complete server bootstrapping: %s", unifyfs_rc_enum_description(rc)); exit(1); } @@ -517,166 +509,6 @@ int main(int argc, char* argv[]) return unifyfs_exit(); } -#if defined(UNIFYFSD_USE_MPI) -#if defined(UNIFYFS_MULTIPLE_DELEGATORS) -/* count the number of delegators per node, and - * the rank of each delegator, the results are stored - * in local_rank_cnt and local_rank_lst. - * @param numTasks: number of processes in the communicator - * @return success/error code */ -static int CountTasksPerNode(int rank, int numTasks) -{ - char localhost[UNIFYFS_MAX_HOSTNAME]; - char hostname[UNIFYFS_MAX_HOSTNAME]; - int resultsLen = UNIFYFS_MAX_HOSTNAME; - - MPI_Status status; - int i, j, rc; - - if (numTasks < 0) { - return -1; - } - - rc = MPI_Get_processor_name(localhost, &resultsLen); - if (rc != 0) { - return -1; - } - - if (rank == 0) { - /* a container of (rank, host) mappings */ - name_rank_pair_t* host_set = - (name_rank_pair_t*)calloc(numTasks, sizeof(name_rank_pair_t)); - /* MPI_Recv all hostnames, and compare to local hostname */ - for (i = 1; i < numTasks; i++) { - rc = MPI_Recv(hostname, UNIFYFS_MAX_HOSTNAME, - MPI_CHAR, MPI_ANY_SOURCE, - MPI_ANY_TAG, - MPI_COMM_WORLD, &status); - if (rc != 0) { - return -1; - } - strcpy(host_set[i].hostname, hostname); - host_set[i].rank = status.MPI_SOURCE; - } - strcpy(host_set[0].hostname, localhost); - host_set[0].rank = 0; - - /* sort by hostname */ - qsort(host_set, numTasks, sizeof(name_rank_pair_t), - compare_name_rank_pair); - - /* rank_cnt: records the number of processes on each host - * rank_set: the list of ranks for each host */ - int** rank_set = (int**)calloc(numTasks, sizeof(int*)); - int* rank_cnt = (int*)calloc(numTasks, sizeof(int)); - - int cursor = 0; - int set_counter = 0; - for (i = 1; i < numTasks; i++) { - if (strcmp(host_set[i].hostname, - host_set[i - 1].hostname) != 0) { - // found a different host, so switch to a new set - int hiter, riter = 0; - rank_set[set_counter] = - (int*)calloc((i - cursor), sizeof(int)); - rank_cnt[set_counter] = i - cursor; - for (hiter = cursor; hiter < i; hiter++, riter++) { - rank_set[set_counter][riter] = host_set[hiter].rank; - } - - set_counter++; - cursor = i; - } - } - - /* fill rank_cnt and rank_set entry for the last host */ - - rank_set[set_counter] = - (int*)calloc((i - cursor), sizeof(int)); - rank_cnt[set_counter] = numTasks - cursor; - j = 0; - for (i = cursor; i < numTasks; i++, j++) { - rank_set[set_counter][j] = host_set[i].rank; - } - set_counter++; - - /* broadcast rank_set information */ - int root_set_no = -1; - for (i = 0; i < set_counter; i++) { - /* send rank set to each of its ranks */ - for (j = 0; j < rank_cnt[i]; j++) { - if (rank_set[i][j] != 0) { - rc = MPI_Send(&rank_cnt[i], 1, MPI_INT, - rank_set[i][j], 0, MPI_COMM_WORLD); - if (rc != 0) { - return -1; - } - rc = MPI_Send(rank_set[i], rank_cnt[i], MPI_INT, - rank_set[i][j], 0, MPI_COMM_WORLD); - if (rc != 0) { - return -1; - } - } else { - root_set_no = i; - local_rank_cnt = rank_cnt[i]; - local_rank_lst = (int*)calloc(rank_cnt[i], sizeof(int)); - memcpy(local_rank_lst, rank_set[i], - (local_rank_cnt * sizeof(int))) - } - } - } - - for (i = 0; i < set_counter; i++) { - free(rank_set[i]); - } - free(rank_cnt); - free(host_set); - free(rank_set); - } else { /* non-root rank */ - /* MPI_Send hostname to root */ - rc = MPI_Send(localhost, UNIFYFS_MAX_HOSTNAME, MPI_CHAR, - 0, 0, MPI_COMM_WORLD); - if (rc != 0) { - return -1; - } - /* receive the local rank set count */ - rc = MPI_Recv(&local_rank_cnt, 1, MPI_INT, 0, - 0, MPI_COMM_WORLD, &status); - if (rc != 0) { - return -1; - } - /* receive the the local rank set */ - local_rank_lst = (int*)calloc(local_rank_cnt, sizeof(int)); - rc = MPI_Recv(local_rank_lst, local_rank_cnt, MPI_INT, 0, - 0, MPI_COMM_WORLD, &status); - if (rc != 0) { - free(local_rank_lst); - return -1; - } - } - - /* sort by rank */ - qsort(local_rank_lst, local_rank_cnt, sizeof(int), compare_int); - - return 0; -} - -static int find_rank_idx(int my_rank) -{ - int i; - assert(local_rank_lst != NULL); - for (i = 0; i < local_rank_cnt; i++) { - if (local_rank_lst[i] == my_rank) { - return i; - } - } - return -1; -} - -#endif // UNIFYFS_MULTIPLE_DELEGATORS -#endif // UNIFYFSD_USE_MPI - - static int unifyfs_exit(void) { int ret = UNIFYFS_SUCCESS; @@ -705,6 +537,13 @@ static int unifyfs_exit(void) } ABT_mutex_unlock(app_configs_abt_sync); + ABT_mutex_lock(pending_metagets_abt_sync); + if (NULL != pending_metagets) { + arraylist_free(pending_metagets); + pending_metagets = NULL; + } + ABT_mutex_unlock(pending_metagets_abt_sync); + /* TODO: notify the service threads to exit */ /* finalize kvstore service*/ @@ -716,22 +555,16 @@ static int unifyfs_exit(void) LOGERR("Error returned from ABT_mutex_free(): %d", ret); } + ret = ABT_mutex_free(&pending_metagets_abt_sync); + if (ret != ABT_SUCCESS) { + LOGERR("Error returned from ABT_mutex_free(): %d", ret); + } + /* shutdown rpc service * (note: this needs to happen after app-client cleanup above) */ LOGDBG("stopping rpc service"); margo_server_rpc_finalize(); -#if defined(USE_MDHIM) - /* shutdown the metadata service*/ - LOGDBG("stopping metadata service"); - meta_sanitize(); -#endif - -#if defined(UNIFYFSD_USE_MPI) - LOGDBG("finalizing MPI"); - MPI_Finalize(); -#endif - /* Finalize the config variables */ LOGDBG("Finalizing config variables"); ret = unifyfs_config_fini(&server_cfg); @@ -1110,15 +943,105 @@ unifyfs_rc add_failed_client(int app_id, int client_id) if (NULL == client) { return EINVAL; } + unifyfs_rc ret = UNIFYFS_SUCCESS; ABT_mutex_lock(app_configs_abt_sync); if (NULL != failed_clients) { int rc = arraylist_add(failed_clients, client); if (rc == -1) { - LOGERR("failed to add client to failed_clients arraylist"); + LOGERR("failed to add client[%d:%d] to failed_clients arraylist", + app_id, client_id); ret = UNIFYFS_FAILURE; } + } else { + LOGERR("failed_clients is NULL!"); + ret = UNIFYFS_FAILURE; } ABT_mutex_unlock(app_configs_abt_sync); return ret; } + +unifyfs_rc add_pending_metaget(int gfid) +{ + int* pending_gfid = (int*) malloc(sizeof(gfid)); + if (NULL == pending_gfid) { + return ENOMEM; + } + *pending_gfid = gfid; + + unifyfs_rc ret = UNIFYFS_SUCCESS; + ABT_mutex_lock(pending_metagets_abt_sync); + if (NULL != pending_metagets) { + int num_pending = arraylist_size(pending_metagets); + if (num_pending > 0) { + for (int i = 0; i < num_pending; i++) { + int* pending = (int*) arraylist_get(pending_metagets, i); + if ((NULL != pending) && (*pending == gfid)) { + ret = EEXIST; + break; + } + } + } + if (ret == UNIFYFS_SUCCESS) { + int rc = arraylist_add(pending_metagets, pending_gfid); + if (rc == -1) { + LOGERR("failed to add gfid=%d to pending_metagets arraylist", + gfid); + ret = UNIFYFS_FAILURE; + } + + } + } else { + LOGERR("pending_metagets is NULL!"); + ret = UNIFYFS_FAILURE; + } + ABT_mutex_unlock(pending_metagets_abt_sync); + return ret; +} + +bool check_pending_metaget(int gfid) +{ + bool is_pending = false; + ABT_mutex_lock(pending_metagets_abt_sync); + if (NULL != pending_metagets) { + int num_pending = arraylist_size(pending_metagets); + if (num_pending > 0) { + for (int i = 0; i < num_pending; i++) { + int* pending = (int*) arraylist_get(pending_metagets, i); + if ((NULL != pending) && (*pending == gfid)) { + is_pending = true; + break; + } + } + } + } else { + LOGERR("pending_metagets is NULL!"); + } + ABT_mutex_unlock(pending_metagets_abt_sync); + return is_pending; +} + +unifyfs_rc clear_pending_metaget(int gfid) +{ + unifyfs_rc ret = UNIFYFS_FAILURE; + ABT_mutex_lock(pending_metagets_abt_sync); + if (NULL != pending_metagets) { + int num_pending = arraylist_size(pending_metagets); + if (num_pending > 0) { + for (int i = 0; i < num_pending; i++) { + int* pending = (int*) arraylist_get(pending_metagets, i); + if ((NULL != pending) && (*pending == gfid)) { + pending = (int*) arraylist_remove(pending_metagets, i); + free(pending); + ret = UNIFYFS_SUCCESS; + break; + } + } + } + } else { + LOGERR("pending_metagets is NULL!"); + ret = UNIFYFS_FAILURE; + } + ABT_mutex_unlock(pending_metagets_abt_sync); + return ret; +} diff --git a/server/src/unifyfs_server_pid.c b/server/src/unifyfs_server_pid.c index 99c1bcf96..c4f2870ff 100644 --- a/server/src/unifyfs_server_pid.c +++ b/server/src/unifyfs_server_pid.c @@ -26,30 +26,17 @@ extern unifyfs_cfg_t server_cfg; -static int n_servers_reported; // = 0 +static volatile int n_servers_reported; // = 0 +static volatile int bootstrap_completed; // = 0 static int* server_pids; // = NULL -static pthread_cond_t server_pid_cond = PTHREAD_COND_INITIALIZER; -static pthread_mutex_t server_pid_mutex = PTHREAD_MUTEX_INITIALIZER; -static struct timespec server_pid_timeout; +static ABT_cond server_bootstrap_cond = ABT_COND_NULL; +static ABT_mutex server_bootstrap_mutex = ABT_MUTEX_NULL; +static struct timespec server_bootstrap_timeout; -static int alloc_server_pids(void) -{ - int ret = 0; - pthread_mutex_lock(&server_pid_mutex); - if (NULL == server_pids) { - server_pids = (int*) calloc(glb_pmi_size, sizeof(int)); - if (NULL == server_pids) { - LOGERR("failed to allocate memory (%s)", strerror(errno)); - ret = ENOMEM; - } - } - pthread_mutex_unlock(&server_pid_mutex); - return ret; -} -static inline int set_pidfile_timeout(void) +static inline int set_bootstrap_timeout(void) { - int ret = 0; + int ret = UNIFYFS_SUCCESS; long timeout_sec = 0; if (server_cfg.server_init_timeout) { @@ -61,16 +48,82 @@ static inline int set_pidfile_timeout(void) } } - clock_gettime(CLOCK_REALTIME, &server_pid_timeout); - server_pid_timeout.tv_sec += timeout_sec; + clock_gettime(CLOCK_REALTIME, &server_bootstrap_timeout); + server_bootstrap_timeout.tv_sec += timeout_sec; + + return ret; +} + +static void free_bootstrap_state(void) +{ + if (ABT_MUTEX_NULL != server_bootstrap_mutex) { + ABT_mutex_lock(server_bootstrap_mutex); + if (ABT_COND_NULL != server_bootstrap_cond) { + ABT_cond_free(&server_bootstrap_cond); + server_bootstrap_cond = ABT_COND_NULL; + } + ABT_mutex_unlock(server_bootstrap_mutex); + ABT_mutex_free(&server_bootstrap_mutex); + server_bootstrap_mutex = ABT_MUTEX_NULL; + } + + if (NULL != server_pids) { + free(server_pids); + server_pids = NULL; + } +} + +static int initialize_bootstrap_state(void) +{ + int rc; + int ret = UNIFYFS_SUCCESS; + + if (ABT_MUTEX_NULL == server_bootstrap_mutex) { + rc = ABT_mutex_create(&server_bootstrap_mutex); + if (ABT_SUCCESS != rc) { + LOGERR("ABT_mutex_create failed"); + return UNIFYFS_ERROR_MARGO; + } + } + + ABT_mutex_lock(server_bootstrap_mutex); + if (ABT_COND_NULL == server_bootstrap_cond) { + rc = ABT_cond_create(&server_bootstrap_cond); + if (ABT_SUCCESS != rc) { + LOGERR("ABT_cond_create failed"); + ret = UNIFYFS_ERROR_MARGO; + } + } + + if (UNIFYFS_SUCCESS == ret) { + ret = set_bootstrap_timeout(); + } + + if ((UNIFYFS_SUCCESS == ret) && (0 == glb_pmi_rank)) { + if (NULL == server_pids) { + server_pids = (int*) calloc(glb_pmi_size, sizeof(int)); + if (NULL == server_pids) { + LOGERR("failed to allocate memory for pid array (%s)", + strerror(errno)); + ret = ENOMEM; + } + } + } + + ABT_mutex_unlock(server_bootstrap_mutex); + + if (ret != UNIFYFS_SUCCESS) { + LOGERR("failed to initialize bootstrap state!"); + free_bootstrap_state(); + } - return 0; + return ret; } static int create_server_pid_file(void) { int i = 0; - int ret = 0; + int ret = UNIFYFS_SUCCESS; char filename[UNIFYFS_MAX_FILENAME] = { 0, }; FILE* fp = NULL; @@ -99,80 +152,117 @@ static int create_server_pid_file(void) int unifyfs_report_server_pid(int rank, int pid) { - assert(rank < glb_pmi_size); + assert((glb_pmi_rank == 0) && (rank < glb_pmi_size)); - int ret = alloc_server_pids(); - if (ret) { - LOGERR("failed to allocate pid array"); - return ret; + /* NOTE: need this here in case we receive a pid report rpc before we + * have initialized state in unifyfs_complete_bootstrap() */ + int rc = initialize_bootstrap_state(); + if (rc) { + LOGERR("failed to initialize bootstrap state"); + return rc; } - pthread_mutex_lock(&server_pid_mutex); + ABT_mutex_lock(server_bootstrap_mutex); n_servers_reported++; server_pids[rank] = pid; - pthread_cond_signal(&server_pid_cond); - pthread_mutex_unlock(&server_pid_mutex); + ABT_cond_signal(server_bootstrap_cond); + ABT_mutex_unlock(server_bootstrap_mutex); + + return UNIFYFS_SUCCESS; +} + +int unifyfs_signal_bootstrap_complete(void) +{ + assert(glb_pmi_rank != 0); + + ABT_mutex_lock(server_bootstrap_mutex); + bootstrap_completed = 1; + ABT_cond_signal(server_bootstrap_cond); + ABT_mutex_unlock(server_bootstrap_mutex); return UNIFYFS_SUCCESS; } -int unifyfs_publish_server_pids(void) +static int wait_for_bootstrap_condition(void) { int ret = UNIFYFS_SUCCESS; + int rc = ABT_cond_timedwait(server_bootstrap_cond, server_bootstrap_mutex, + &server_bootstrap_timeout); + if (ABT_ERR_COND_TIMEDOUT == rc) { + LOGERR("server initialization timeout"); + ret = UNIFYFS_ERROR_TIMEOUT; + } else if (rc) { + LOGERR("failed to wait on condition (err=%d)", rc); + ret = UNIFYFS_ERROR_MARGO; + } + return ret; +} + +int unifyfs_complete_bootstrap(void) +{ + int ret = UNIFYFS_SUCCESS; + + int rc = initialize_bootstrap_state(); + if (UNIFYFS_SUCCESS != rc) { + LOGERR("ABT_mutex_create failed"); + return UNIFYFS_ERROR_MARGO; + } if (glb_pmi_rank > 0) { - /* publish my pid to server 0 */ + /* publish my pid to server rank 0 */ + LOGDBG("server[%d] - reporting pid", glb_pmi_rank); ret = unifyfs_invoke_server_pid_rpc(); if (ret) { LOGERR("failed to invoke pid rpc (%s)", strerror(ret)); + } else { + /* wait for "bootstrap-complete" broadcast from server rank 0 */ + ABT_mutex_lock(server_bootstrap_mutex); + while (!bootstrap_completed) { + ret = wait_for_bootstrap_condition(); + if (UNIFYFS_ERROR_TIMEOUT == ret) { + break; + } + } + ABT_mutex_unlock(server_bootstrap_mutex); + if (bootstrap_completed) { + LOGDBG("server[%d] - bootstrap completed", glb_pmi_rank); + } } - } else { /* rank 0 acts as coordinator */ - ret = alloc_server_pids(); - if (ret) { - return ret; - } - - ret = set_pidfile_timeout(); - if (ret) { - return ret; - } - - pthread_mutex_lock(&server_pid_mutex); - server_pids[0] = server_pid; - n_servers_reported++; + } else { /* server rank 0 acts as coordinator */ /* keep checking count of reported servers until all have reported * or we hit the timeout */ + ABT_mutex_lock(server_bootstrap_mutex); + server_pids[0] = server_pid; + n_servers_reported++; while (n_servers_reported < glb_pmi_size) { - ret = pthread_cond_timedwait(&server_pid_cond, - &server_pid_mutex, - &server_pid_timeout); - if (ETIMEDOUT == ret) { - LOGERR("server initialization timeout"); - break; - } else if (ret) { - LOGERR("failed to wait on condition (err=%d, %s)", - errno, strerror(errno)); + ret = wait_for_bootstrap_condition(); + if (UNIFYFS_ERROR_TIMEOUT == ret) { break; } } + ABT_mutex_unlock(server_bootstrap_mutex); if (n_servers_reported == glb_pmi_size) { - ret = create_server_pid_file(); + bootstrap_completed = 1; + LOGDBG("server[%d] - bootstrap completed", glb_pmi_rank); + ret = unifyfs_invoke_broadcast_bootstrap_complete(); if (UNIFYFS_SUCCESS == ret) { LOGDBG("servers ready to accept client connections"); + ret = create_server_pid_file(); + if (UNIFYFS_SUCCESS != ret) { + LOGERR("pid file creation failed!"); + } + } else { + LOGERR("bootstrap broadcast failed!"); } } else { LOGERR("%d of %d servers reported their pids", n_servers_reported, glb_pmi_size); } - - free(server_pids); - server_pids = NULL; - - pthread_mutex_unlock(&server_pid_mutex); } + free_bootstrap_state(); + return ret; } - diff --git a/server/src/unifyfs_service_manager.c b/server/src/unifyfs_service_manager.c index 001a163eb..036680b05 100644 --- a/server/src/unifyfs_service_manager.c +++ b/server/src/unifyfs_service_manager.c @@ -42,9 +42,9 @@ typedef struct { pthread_t thrd; pid_t tid; - /* pthread mutex and condition variable for work notification */ - pthread_mutex_t thrd_lock; - pthread_cond_t thrd_cond; + /* mutex and condition variable for work notification */ + ABT_mutex thrd_lock; + ABT_cond thrd_cond; /* thread status */ int initialized; @@ -54,7 +54,7 @@ typedef struct { /* thread return status code */ int sm_exit_rc; - /* argobots mutex for synchronizing access to request state between + /* mutex for synchronizing access to request state between * margo rpc handler ULTs and SM thread */ ABT_mutex reqs_sync; @@ -75,7 +75,7 @@ svcmgr_state_t* sm; // = NULL do { \ if ((NULL != sm) && sm->initialized) { \ /*LOGDBG("locking SM state");*/ \ - pthread_mutex_lock(&(sm->thrd_lock)); \ + ABT_mutex_lock(sm->thrd_lock); \ } \ } while (0) @@ -83,7 +83,7 @@ do { \ do { \ if ((NULL != sm) && sm->initialized) { \ /*LOGDBG("unlocking SM state");*/ \ - pthread_mutex_unlock(&(sm->thrd_lock)); \ + ABT_mutex_unlock(sm->thrd_lock); \ } \ } while (0) @@ -110,7 +110,7 @@ static inline void signal_svcmgr(void) if (this_thread != sm->tid) { /* signal svcmgr to begin processing the requests we just added */ LOGDBG("signaling new service requests"); - pthread_cond_signal(&(sm->thrd_cond)); + ABT_cond_signal(sm->thrd_cond); } } @@ -173,32 +173,21 @@ int svcmgr_init(void) return ENOMEM; } - /* initialize lock for shared data structures of the + /* create mutex locks for thread and request data structures of the * service manager */ - pthread_mutexattr_t attr; - pthread_mutexattr_init(&attr); - pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); - int rc = pthread_mutex_init(&(sm->thrd_lock), &attr); - if (rc != 0) { - LOGERR("pthread_mutex_init failed for service manager rc=%d (%s)", - rc, strerror(rc)); - svcmgr_fini(); - return rc; - } + ABT_mutex_create(&(sm->thrd_lock)); + ABT_mutex_create(&(sm->reqs_sync)); /* initialize condition variable to synchronize work * notifications for the request manager thread */ - rc = pthread_cond_init(&(sm->thrd_cond), NULL); - if (rc != 0) { - LOGERR("pthread_cond_init failed for service manager rc=%d (%s)", - rc, strerror(rc)); - pthread_mutex_destroy(&(sm->thrd_lock)); + int rc = ABT_cond_create(&(sm->thrd_cond)); + if (rc != ABT_SUCCESS) { + LOGERR("ABT_cond_create() failed for service manager rc=%d", rc); + ABT_mutex_free(&(sm->thrd_lock)); svcmgr_fini(); - return rc; + return UNIFYFS_ERROR_MARGO; } - ABT_mutex_create(&(sm->reqs_sync)); - /* allocate a list to track chunk reads */ sm->chunk_reads = arraylist_create(0); if (sm->chunk_reads == NULL) { @@ -249,10 +238,10 @@ int svcmgr_fini(void) if (sm->initialized) { /* join thread before cleaning up state */ if (sm->tid != -1) { - pthread_mutex_lock(&(sm->thrd_lock)); + ABT_mutex_lock(sm->thrd_lock); sm->time_to_exit = 1; - pthread_cond_signal(&(sm->thrd_cond)); - pthread_mutex_unlock(&(sm->thrd_lock)); + ABT_cond_signal(sm->thrd_cond); + ABT_mutex_unlock(sm->thrd_lock); pthread_join(sm->thrd, NULL); } } @@ -273,15 +262,10 @@ int svcmgr_fini(void) arraylist_free(sm->svc_reqs); } - int abt_err = ABT_mutex_free(&(sm->reqs_sync)); - if (ABT_SUCCESS != abt_err) { - /* All we can really do here is log the error */ - LOGERR("Error code returned from ABT_mutex_free(): %d", abt_err); - } - if (sm->initialized) { - pthread_mutex_destroy(&(sm->thrd_lock)); - pthread_cond_destroy(&(sm->thrd_cond)); + ABT_mutex_free(&(sm->reqs_sync)); + ABT_mutex_free(&(sm->thrd_lock)); + ABT_cond_free(&(sm->thrd_cond)); } /* free the service manager struct allocated during init */ @@ -498,6 +482,12 @@ int sm_set_fileattr(int gfid, LOGERR("failed to set attributes for gfid=%d (rc=%d, is_owner=%d)", gfid, ret, is_owner); } + } else if (is_owner && (file_op == UNIFYFS_FILE_ATTR_OP_CREATE)) { + /* start a broadcast rpc to inform other servers of new file */ + int rc = unifyfs_invoke_broadcast_fileattr(gfid, file_op, attrs); + if (rc != UNIFYFS_SUCCESS) { + LOGERR("failed to broadcast new file (gfid=%d) creation", gfid); + } } return ret; } @@ -1032,38 +1022,14 @@ static int process_metaset_rpc(server_rpc_req_t* req) int gfid = (int) in->gfid; int attr_op = (int) in->fileop; unifyfs_file_attr_t* attrs = &(in->attr); - int ret = sm_set_fileattr(gfid, attr_op, attrs); - margo_free_input(req->handle, in); - free(in); - - /* send rpc response */ - metaset_out_t out; - out.ret = (int32_t) ret; - hg_return_t hret = margo_respond(req->handle, &out); - if (hret != HG_SUCCESS) { - LOGERR("margo_respond() failed"); - } - - /* cleanup req */ - margo_destroy(req->handle); - return ret; -} + int ret = sm_set_fileattr(gfid, attr_op, attrs); -static int process_server_pid_rpc(server_rpc_req_t* req) -{ - /* get input parameters */ - server_pid_in_t* in = req->input; - int src_rank = (int) in->rank; - int pid = (int) in->pid; margo_free_input(req->handle, in); free(in); - /* do pid report */ - int ret = unifyfs_report_server_pid(src_rank, pid); - /* send rpc response */ - server_pid_out_t out; + metaset_out_t out; out.ret = (int32_t) ret; hg_return_t hret = margo_respond(req->handle, &out); if (hret != HG_SUCCESS) { @@ -1135,6 +1101,21 @@ static int process_truncate_rpc(server_rpc_req_t* req) return ret; } +static int process_bootstrap_bcast_rpc(server_rpc_req_t* req) +{ + /* signal bootstrap completion */ + int ret = unifyfs_signal_bootstrap_complete(); + if (ret != UNIFYFS_SUCCESS) { + LOGERR("unifyfs_signal_bootstrap_complete() failed - rc=%d", ret); + } + collective_set_local_retval(req->coll, ret); + + /* create a ULT to finish broadcast operation */ + ret = invoke_bcast_progress_rpc(req->coll); + + return ret; +} + static int process_extents_bcast_rpc(server_rpc_req_t* req) { /* get target file and extents */ @@ -1171,8 +1152,13 @@ static int process_fileattr_bcast_rpc(server_rpc_req_t* req) /* update file attributes */ int ret = sm_set_fileattr(gfid, attr_op, attrs); if (ret != UNIFYFS_SUCCESS) { - LOGERR("set_fileattr(gfid=%d, op=%d) failed - rc=%d", - gfid, attr_op, ret); + if ((attr_op == UNIFYFS_FILE_ATTR_OP_CREATE) && (ret == EEXIST)) { + /* ignore duplicate creates */ + ret = UNIFYFS_SUCCESS; + } else { + LOGWARN("set_fileattr(gfid=%d, op=%d) failed - rc=%d (%s)", + gfid, attr_op, ret, unifyfs_rc_enum_description(ret)); + } } collective_set_local_retval(req->coll, ret); @@ -1320,6 +1306,215 @@ static int process_unlink_bcast_rpc(server_rpc_req_t* req) return ret; } +static int process_metaget_bcast_rpc(server_rpc_req_t* req) +{ + /* Iterate through the global_inode_tree and copy all the file + * attr structs for the files this server owns */ + + /* The file names in the unifyfs_file_attr_t are pointers to separately + * allocated memory, and thus have to be handled specially. We'll copy + * the filenames into a separate char[] that will be sent as an hg_string_t + * seprate from the bulk transfer of the unifyfs_file_attr_t structs. We + * use this variable to keep track of how big that buffer needs to be. + */ + uintptr_t total_name_len = 0; + /* Note: Using uintptr_t because this value get cast to a char* and must + * therefore be the same size as a pointer. */ + char* concatenated_names = NULL; + size_t concatenated_names_size = 0; + + unsigned int num_files = 0; + unifyfs_file_attr_t* attr_list; + + int ret = unifyfs_get_owned_files(&num_files, &attr_list); + if (UNIFYFS_SUCCESS != ret) { + return ret; + } + + /* Loop through the attr list and calculate the total length + * of all the filenames. We'll need this down below...*/ + for (unsigned int i = 0; i < num_files; i++) { + concatenated_names_size += strlen(attr_list[i].filename); + } + + concatenated_names = calloc(concatenated_names_size+1, sizeof(char)); + // +1 to allow space for the null terminator + if (!concatenated_names) { + free(attr_list); + return ENOMEM; + } + + /* unifyfs_file_attr_t.filename is a pointer. Since sending pointers over + * the network is useless, we're going to abuse this value by using it to + * store the offset into the separate char array that will hold the + * filenames. + * We only need the offset of the START of the filename. The end of the + * filename is assumed to be 1 character before the start of the next + * filename or - if this is the last file - the end of the string. */ + for (unsigned int i = 0; i < num_files; i++) { + size_t filename_len = strlen(attr_list[i].filename); + strcat(concatenated_names, attr_list[i].filename); + free(attr_list[i].filename); + attr_list[i].filename = (char*)total_name_len; + total_name_len += filename_len; + } + + coll_request* coll = (coll_request*)req->coll; + // Do a couple of sanity checks + if (UNIFYFS_SERVER_BCAST_RPC_METAGET != coll->req_type) { + LOGERR("invalid collective request type %d", + coll->req_type); + free(attr_list); + free(concatenated_names); + return UNIFYFS_ERROR_MARGO; + } + if (sizeof(metaget_all_bcast_out_t) != coll->output_sz) { + LOGERR("Unexpected size for collective output struct. " + "Expected %d but value was %d", + sizeof(metaget_all_bcast_out_t), + coll->output_sz); + free(attr_list); + free(concatenated_names); + return UNIFYFS_ERROR_MARGO; + } + + // If there are any files, then setup the bulk transfer + if (num_files) { + hg_size_t buf_size = num_files * sizeof(unifyfs_file_attr_t); + hg_bulk_t file_attrs_bulk; + hg_return_t hret = + margo_bulk_create(unifyfsd_rpc_context->svr_mid, 1, + (void**)&attr_list, &buf_size, + HG_BULK_READ_ONLY, &file_attrs_bulk); + if (hret != HG_SUCCESS) { + LOGERR("margo_bulk_create() failed - %s", HG_Error_to_string(hret)); + free(attr_list); + free(concatenated_names); + collective_set_local_retval(req->coll, UNIFYFS_ERROR_MARGO); + return UNIFYFS_ERROR_MARGO; + } + + /* set the output params */ + metaget_all_bcast_out_t* mabo = (metaget_all_bcast_out_t*)coll->output; + mabo->file_meta = file_attrs_bulk; + mabo->num_files = num_files; + mabo->filenames = concatenated_names; + } else { + /* There's no files to transfer - set output params appropriately */ + metaget_all_bcast_out_t* mabo = (metaget_all_bcast_out_t*)coll->output; + mabo->file_meta = HG_BULK_NULL; + mabo->num_files = 0; + mabo->filenames = NULL; + + /* Also need to free attr_list and concatenated_names since they're + * not actually being used */ + free(attr_list); + free(concatenated_names); + } + + collective_set_local_retval(req->coll, UNIFYFS_SUCCESS); + + /* create a ULT to finish broadcast operation */ + return invoke_bcast_progress_rpc(req->coll); +} + +static int process_pending_sync(server_rpc_req_t* req) +{ + int ret = UNIFYFS_SUCCESS; + + /* get target file */ + int* pending_gfid = req->input; + int gfid = *pending_gfid; + free(pending_gfid); + + int owner_rank = hash_gfid_to_server(gfid); + int is_owner = (owner_rank == glb_pmi_rank); + + bool has_pending = unifyfs_inode_has_pending_extents(gfid); + if (has_pending) { + usleep(50000); /* sleep 50 ms to catch more pending extents */ + } + + arraylist_t* pending_list = NULL; + int rc = unifyfs_inode_get_pending_extents(gfid, &pending_list); + if (NULL != pending_list) { + LOGDBG("processing pending sync for gfid=%d", gfid); + + /* iterate through pending list to count total number of extents + * we will add locally (and possibly send to owner) */ + unsigned int total_extents = 0; + int n_items = arraylist_size(pending_list); + for (int i = 0; i < n_items; i++) { + void* item = arraylist_get(pending_list, i); + if (NULL != item) { + pending_extents_item* pei = (pending_extents_item*) item; + total_extents += pei->num_extents; + } + } + + /* allocate array for all the extents and then copy the sub-arrays + * from the pending list */ + extent_metadata* combined_extents = calloc((size_t)total_extents, + sizeof(extent_metadata)); + if (NULL == combined_extents) { + LOGERR("failed to allocate for combined extents"); + ret = ENOMEM; + } else { + unsigned int n_copied = 0; + for (int i = 0; i < n_items; i++) { + void* item = arraylist_get(pending_list, i); + if (NULL != item) { + pending_extents_item* pei = (pending_extents_item*) item; + memcpy(combined_extents + n_copied, pei->extents, + pei->num_extents * sizeof(extent_metadata)); + n_copied += pei->num_extents; + free(pei->extents); + } + } + + /* add the combined list to local inode */ + ret = unifyfs_inode_add_extents(gfid, total_extents, + combined_extents); + + if ((ret == UNIFYFS_SUCCESS) && !is_owner) { + /* send the combined list to the owner */ + ret = unifyfs_invoke_add_extents_rpc(gfid, total_extents, + combined_extents); + } + } + + /* iterate through pending list to send responses to client reqs */ + for (int i = 0; i < n_items; i++) { + void* item = arraylist_get(pending_list, i); + if (NULL != item) { + pending_extents_item* pei = (pending_extents_item*) item; + client_rpc_req_t* creq = pei->client_req; + + /* send rpc response to requesting client */ + unifyfs_fsync_out_t out; + out.ret = (int32_t) ret; + hg_return_t hret = margo_respond(creq->handle, &out); + if (hret != HG_SUCCESS) { + LOGERR("margo_respond() failed"); + } + + /* cleanup req */ + margo_destroy(creq->handle); + free(creq); + } + } + + /* this frees the list and each of the items */ + arraylist_free(pending_list); + } else if (rc != UNIFYFS_SUCCESS) { + ret = rc; + LOGERR("failed to get pending extents list for gfid=%d- rc=%d", + gfid, ret); + } + + return ret; +} + static int process_service_requests(void) { /* assume we'll succeed */ @@ -1374,15 +1569,15 @@ static int process_service_requests(void) case UNIFYFS_SERVER_RPC_METASET: rret = process_metaset_rpc(req); break; - case UNIFYFS_SERVER_RPC_PID_REPORT: - rret = process_server_pid_rpc(req); - break; case UNIFYFS_SERVER_RPC_TRANSFER: rret = process_transfer_rpc(req); break; case UNIFYFS_SERVER_RPC_TRUNCATE: rret = process_truncate_rpc(req); break; + case UNIFYFS_SERVER_BCAST_RPC_BOOTSTRAP: + rret = process_bootstrap_bcast_rpc(req); + break; case UNIFYFS_SERVER_BCAST_RPC_EXTENTS: rret = process_extents_bcast_rpc(req); break; @@ -1392,6 +1587,9 @@ static int process_service_requests(void) case UNIFYFS_SERVER_BCAST_RPC_LAMINATE: rret = process_laminate_bcast_rpc(req); break; + case UNIFYFS_SERVER_BCAST_RPC_METAGET: + rret = process_metaget_bcast_rpc(req); + break; case UNIFYFS_SERVER_BCAST_RPC_TRANSFER: rret = process_transfer_bcast_rpc(req); break; @@ -1401,6 +1599,9 @@ static int process_service_requests(void) case UNIFYFS_SERVER_BCAST_RPC_UNLINK: rret = process_unlink_bcast_rpc(req); break; + case UNIFYFS_SERVER_PENDING_SYNC: + rret = process_pending_sync(req); + break; default: LOGERR("unsupported server rpc request type %d", req->req_type); rret = UNIFYFS_ERROR_NYI; @@ -1499,12 +1700,11 @@ void* service_manager_thread(void* arg) timeout.tv_nsec -= 1000000000; timeout.tv_sec++; } - int wait_rc = pthread_cond_timedwait(&(sm->thrd_cond), - &(sm->thrd_lock), - &timeout); - if (0 == wait_rc) { + int wait_rc = ABT_cond_timedwait(sm->thrd_cond, sm->thrd_lock, + &timeout); + if (ABT_SUCCESS == wait_rc) { LOGDBG("SM got work"); - } else if (ETIMEDOUT != wait_rc) { + } else if (ABT_ERR_COND_TIMEDOUT != wait_rc) { LOGERR("SM work condition wait failed (rc=%d)", wait_rc); } diff --git a/t/Makefile.am b/t/Makefile.am index afa3d2c5c..902b682b5 100644 --- a/t/Makefile.am +++ b/t/Makefile.am @@ -134,7 +134,8 @@ test_sysio_sources = \ sys/write-read-hole.c \ sys/truncate.c \ sys/unlink.c \ - sys/chdir.c + sys/chdir.c \ + sys/stat.c sys_sysio_gotcha_t_CPPFLAGS = $(test_cppflags) sys_sysio_gotcha_t_LDADD = $(test_gotcha_ldadd) @@ -184,9 +185,9 @@ unifyfs_unmount_t_LDADD = $(test_wrap_ldadd) unifyfs_unmount_t_LDFLAGS = $(test_wrap_ldflags) unifyfs_unmount_t_SOURCES = unifyfs_unmount.c -common_seg_tree_test_t_CPPFLAGS = $(test_cppflags) +common_seg_tree_test_t_CPPFLAGS = $(test_cppflags) $(MARGO_CFLAGS) common_seg_tree_test_t_LDADD = $(test_common_ldadd) -common_seg_tree_test_t_LDFLAGS = $(test_common_ldflags) +common_seg_tree_test_t_LDFLAGS = $(test_common_ldflags) $(MARGO_LIBS) common_seg_tree_test_t_SOURCES = \ common/seg_tree_test.c \ ../common/src/seg_tree.c \ diff --git a/t/sharness.d/00-test-env.sh b/t/sharness.d/00-test-env.sh index a61583511..bbe56882c 100644 --- a/t/sharness.d/00-test-env.sh +++ b/t/sharness.d/00-test-env.sh @@ -25,7 +25,7 @@ export UNIFYFS_TEST_RUN_SCRIPT=$UNIFYFS_BUILD_DIR/t/test_run_env.sh if test -n "$(which jsrun 2>/dev/null)"; then JOB_RUN_COMMAND="jsrun -r1 -n1" elif test -n "$(which srun 2>/dev/null)"; then - JOB_RUN_COMMAND="srun -n1 -N1" + JOB_RUN_COMMAND="srun -n1 -N1 --overlap" elif test -n "$(which mpirun 2>/dev/null)"; then JOB_RUN_COMMAND="mpirun -np 1" if [ $UID -eq 0 ]; then diff --git a/t/sys/stat.c b/t/sys/stat.c new file mode 100644 index 000000000..153b81d7b --- /dev/null +++ b/t/sys/stat.c @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2019, Lawrence Livermore National Security, LLC. + * Produced at the Lawrence Livermore National Laboratory. + * + * Copyright 2018, UT-Battelle, LLC. + * + * LLNL-CODE-741539 + * All rights reserved. + * + * This is the license for UnifyFS. + * For details, see https://github.com/LLNL/UnifyFS. + * Please read https://github.com/LLNL/UnifyFS/LICENSE for full license text. + */ + + /* + * Test stat, lstat, and fstat + */ +#include +#include +#include +#include +#include +#include +#include "t/lib/tap.h" +#include "t/lib/testutil.h" + +int stat_test(char* unifyfs_root) +{ + diag("Starting UNIFYFS_WRAP(stat) tests"); + + char path[64]; + char dir_path[64]; + int err, fd, rc; + struct stat sb = {0}; + + testutil_rand_path(path, sizeof(path), unifyfs_root); + testutil_rand_path(dir_path, sizeof(dir_path), unifyfs_root); + + errno = 0; + rc = stat(path, &sb); + err = errno; + ok(rc == -1 && err == ENOENT, + "%s:%d stat() non-existent file fails (errno=%d): %s", + __FILE__, __LINE__, err, strerror(err)); + + errno = 0; + rc = lstat(path, &sb); + err = errno; + ok(rc == -1 && err == ENOENT, + "%s:%d lstat() non-existent file fails (errno=%d): %s", + __FILE__, __LINE__, err, strerror(err)); + + errno = 0; + fd = creat(path, 0222); + err = errno; + ok(fd != -1 && err == 0, "%s:%d creat(%s) (fd=%d): %s", + __FILE__, __LINE__, path, fd, strerror(err)); + + errno = 0; + rc = fsync(fd); + err = errno; + ok(rc == 0 && err == 0, "%s:%d fsync(): %s", + __FILE__, __LINE__, strerror(err)); + + errno = 0; + rc = close(fd); + err = errno; + ok(rc == 0 && err == 0, "%s:%d close(): %s", + __FILE__, __LINE__, strerror(err)); + + errno = 0; + rc = stat(path, &sb); + err = errno; + ok(rc == 0 && err == 0, "%s:%d stat(): %s", + __FILE__, __LINE__, strerror(err)); + + errno = 0; + rc = lstat(path, &sb); + err = errno; + ok(rc == 0 && err == 0, "%s:%d lstat(): %s", + __FILE__, __LINE__, strerror(err)); + + errno = 0; + rc = fstat(fd, &sb); + err = errno; + ok(rc == -1 && err == EBADF, + "%s:%d fstat() after close fails (errno=%d): %s", + __FILE__, __LINE__, err, strerror(err)); + + errno = 0; + rc = unlink(path); + err = errno; + ok(rc == 0 && err == 0, + "%s:%d unlink() empty file: %s", + __FILE__, __LINE__, strerror(err)); + + errno = 0; + rc = stat(path, &sb); + err = errno; + ok(rc == -1 && err == ENOENT, + "%s:%d stat() after unlink fails (errno=%d): %s", + __FILE__, __LINE__, err, strerror(err)); + + errno = 0; + rc = lstat(path, &sb); + err = errno; + ok(rc == -1 && err == ENOENT, + "%s:%d lstat() after unlink fails (errno=%d): %s", + __FILE__, __LINE__, err, strerror(err)); + + errno = 0; + rc = fstat(fd, &sb); + err = errno; + ok(rc == -1 && err == EBADF, + "%s:%d fstat() after unlink fails (errno=%d): %s", + __FILE__, __LINE__, err, strerror(err)); + + diag("Finished UNIFYFS_WRAP(stat) tests"); + + return 0; +} diff --git a/t/sys/sysio_suite.c b/t/sys/sysio_suite.c index a2d5bbdbd..3f68d3c6c 100644 --- a/t/sys/sysio_suite.c +++ b/t/sys/sysio_suite.c @@ -107,6 +107,8 @@ int main(int argc, char* argv[]) chdir_test(unifyfs_root); + stat_test(unifyfs_root); + rc = unifyfs_unmount(); ok(rc == 0, "unifyfs_unmount(%s) (rc=%d)", unifyfs_root, rc); diff --git a/t/sys/sysio_suite.h b/t/sys/sysio_suite.h index c4feb98b1..265e09c45 100644 --- a/t/sys/sysio_suite.h +++ b/t/sys/sysio_suite.h @@ -74,4 +74,7 @@ int unlink_test(char* unifyfs_root); int chdir_test(char* unifyfs_root); +/* Test for UNIFYFS_WRAP(stat, lstat, fstat) */ +int stat_test(char* unifyfs_root); + #endif /* SYSIO_SUITE_H */ diff --git a/t/sys/unlink.c b/t/sys/unlink.c index 4f68768ef..961e5bb02 100644 --- a/t/sys/unlink.c +++ b/t/sys/unlink.c @@ -156,7 +156,7 @@ static int unlink_after_sync_laminate_test(char* unifyfs_root) int unlink_test(char* unifyfs_root) { - diag("Finished UNIFYFS_WRAP(unlink) tests"); + diag("Starting UNIFYFS_WRAP(unlink) tests"); char path[64]; char dir_path[64]; diff --git a/t/sys/write-read.c b/t/sys/write-read.c index 2c63d9c2a..dc5fcee22 100644 --- a/t/sys/write-read.c +++ b/t/sys/write-read.c @@ -360,8 +360,11 @@ int write_pre_existing_file_test(char* unifyfs_root) { diag("Starting write-to-pre-existing-file tests"); +#define TEST_LEN 300 +#define TEST_LEN_SHORT 100 +/* TEST_LEN_SHORT must be less than TEST_LEN */ char path[64]; - char buf[300] = {0}; + char buf[TEST_LEN] = {0}; int fd = -1; int err, rc; size_t global; @@ -374,13 +377,13 @@ int write_pre_existing_file_test(char* unifyfs_root) ok(fd != -1 && err == 0, "%s:%d open(%s) (fd=%d): %s", __FILE__, __LINE__, path, fd, strerror(err)); - /* Write 300 bytes to a file */ + /* Write TEST_LEN bytes to a file */ errno = 0; - rc = (int) write(fd, "a", 300); + rc = (int) write(fd, buf, TEST_LEN); err = errno; - ok(rc == 300 && err == 0, - "%s:%d write() a 300 byte file: %s", - __FILE__, __LINE__, strerror(err)); + ok(rc == TEST_LEN && err == 0, + "%s:%d write() a %d byte file: %s", + __FILE__, __LINE__, TEST_LEN, strerror(err)); errno = 0; rc = close(fd); @@ -388,10 +391,10 @@ int write_pre_existing_file_test(char* unifyfs_root) ok(rc == 0 && err == 0, "%s:%d close() worked: %s", __FILE__, __LINE__, strerror(err)); - /* Check global size is 300 */ + /* Check global size is correct */ testutil_get_size(path, &global); - ok(global == 300, "%s:%d global size of 300 byte file is %zu: %s", - __FILE__, __LINE__, global, strerror(err)); + ok(global == TEST_LEN, "%s:%d global size of %d byte file is %zu: %s", + __FILE__, __LINE__, TEST_LEN, global, strerror(err)); /* Reopen the same file */ errno = 0; @@ -400,13 +403,13 @@ int write_pre_existing_file_test(char* unifyfs_root) ok(fd != -1 && err == 0, "%s:%d open(%s) (fd=%d): %s", __FILE__, __LINE__, path, fd, strerror(err)); - /* Overwrite the first 100 bytes of same file */ + /* Overwrite the first part of same file */ errno = 0; - rc = (int) write(fd, buf, 100); + rc = (int) write(fd, buf, TEST_LEN_SHORT); err = errno; - ok(rc == 100 && err == 0, - "%s:%d overwrite first 100 bytes of same file: %s", - __FILE__, __LINE__, strerror(err)); + ok(rc == TEST_LEN_SHORT && err == 0, + "%s:%d overwrite first %d bytes of same file: %s", + __FILE__, __LINE__, TEST_LEN_SHORT, strerror(err)); errno = 0; rc = close(fd); @@ -414,10 +417,10 @@ int write_pre_existing_file_test(char* unifyfs_root) ok(rc == 0 && err == 0, "%s:%d close() worked: %s", __FILE__, __LINE__, strerror(err)); - /* Check global size is 300 */ + /* Check global size is still correct */ testutil_get_size(path, &global); - ok(global == 300, "%s:%d global size of 300 byte file is %zu: %s", - __FILE__, __LINE__, global, strerror(err)); + ok(global == TEST_LEN, "%s:%d global size of %d byte file is %zu: %s", + __FILE__, __LINE__, TEST_LEN, global, strerror(err)); errno = 0; rc = unlink(path); diff --git a/util/unifyfs-stage/src/unifyfs-stage-transfer.c b/util/unifyfs-stage/src/unifyfs-stage-transfer.c index 7c3914a16..d9dda46f0 100644 --- a/util/unifyfs-stage/src/unifyfs-stage-transfer.c +++ b/util/unifyfs-stage/src/unifyfs-stage-transfer.c @@ -25,7 +25,8 @@ #include #include -#include +#include // still needed for the MD5_DIGEST_LENGTH define +#include #include "unifyfs-stage.h" @@ -163,8 +164,9 @@ static int md5_checksum(unifyfs_stage* ctx, unifyfs_rc urc; size_t len = 0; off_t file_offset; - MD5_CTX md5; + EVP_MD_CTX* md5; unsigned char data[UNIFYFS_STAGE_MD5_BLOCKSIZE]; + unsigned int digest_len = UNIFYFS_STAGE_MD5_BLOCKSIZE; if (is_unify_file) { fd = -1; @@ -186,10 +188,12 @@ static int md5_checksum(unifyfs_stage* ctx, } } - /* NOTE: MD5_xxxx() returns 1 for success */ - md5_rc = MD5_Init(&md5); + /* NOTE: EVP_DigestInit_ex() returns 1 for success */ + md5 = EVP_MD_CTX_create(); + md5_rc = EVP_DigestInit_ex(md5, EVP_md5(), NULL); if (md5_rc != 1) { - fprintf(stderr, "UNIFYFS-STAGE ERROR: failed to create MD5 context\n"); + fprintf(stderr, + "UNIFYFS-STAGE ERROR: failed to initialize MD5 context\n"); ret = EIO; } else { file_offset = 0; @@ -208,7 +212,7 @@ static int md5_checksum(unifyfs_stage* ctx, break; } else if (len) { file_offset += (off_t) len; - md5_rc = MD5_Update(&md5, data, len); + md5_rc = EVP_DigestUpdate(md5, data, len); if (md5_rc != 1) { fprintf(stderr, "UNIFYFS-STAGE ERROR: " "MD5 checksum update failed\n"); @@ -218,11 +222,13 @@ static int md5_checksum(unifyfs_stage* ctx, } } while (len != 0); - md5_rc = MD5_Final(digest, &md5); + md5_rc = EVP_DigestFinal_ex(md5, data, &digest_len); if (md5_rc != 1) { fprintf(stderr, "UNIFYFS-STAGE ERROR: failed to finalize MD5\n"); ret = EIO; } + + EVP_MD_CTX_destroy(md5); } if (-1 != fd) { diff --git a/util/unifyfs/src/unifyfs-rm.c b/util/unifyfs/src/unifyfs-rm.c index f6d347e81..36e5a5260 100644 --- a/util/unifyfs/src/unifyfs-rm.c +++ b/util/unifyfs/src/unifyfs-rm.c @@ -617,6 +617,102 @@ static int slurm_read_resource(unifyfs_resource_t* resource) return ret; } +/** + * + * @brief Get list of nodes using Flux resource + * + * @param resource The job resource record to be filled + * + * @return 0 on success, negative errno otherwise + */ +static int flux_read_resource(unifyfs_resource_t* resource) +{ + size_t n_nodes = 0; + char num_nodes_str[10] = {0}; + char nodelist_str[1024] = {0}; + char* ret = NULL; + FILE* pipe_fp = NULL; + char** nodes = NULL; + int node_idx = 0; + + // get num nodes using flux resource command + pipe_fp = popen("flux resource list --states=free -no {nnodes}", "r"); + ret = fgets(num_nodes_str, sizeof(num_nodes_str), pipe_fp); + if (ret == NULL) { + pclose(pipe_fp); + return -EINVAL; + } + n_nodes = (size_t) strtoul(num_nodes_str, NULL, sizeof(num_nodes_str)); + pclose(pipe_fp); + + nodes = calloc(sizeof(char*), n_nodes); + if (nodes == NULL) { + return -ENOMEM; + } + + // get node list using flux resource command + // the returned list is in a condensed format + // e.g., tioga[18-19,21,32] + // TODO: is it safe to assume flux resource only + // returns a single line? + pipe_fp = popen("flux resource list --states=free -no {nodelist}", "r"); + ret = fgets(nodelist_str, sizeof(nodelist_str), pipe_fp); + if (ret == NULL) { + pclose(pipe_fp); + return -EINVAL; + } + pclose(pipe_fp); + + // get the node ids, i.e., the list in [] + char* node_ids = strstr(nodelist_str, "["); + if (node_ids) { + // remove the trailing "]\n" + nodelist_str[strlen(nodelist_str)-2] = 0; + char* host = calloc(1, (node_ids-nodelist_str)+2); + strncpy(host, nodelist_str, (node_ids-nodelist_str)); + + // separate by "," + char* end_str; + char* token = strtok_r(node_ids+1, ",", &end_str); + while (token) { + // case 1: contiguous node range + // e.g., 3-5, lo=3, hi=5 + // case 2: a single node, then lo=hi + int lo, hi; + if (strstr(token, "-")) { + char* end_str2; + char* lo_str = strtok_r(token, "-", &end_str2); + char* hi_str = strtok_r(NULL, "-", &end_str2); + lo = atoi(lo_str); + hi = atoi(hi_str); + } else { + lo = atoi(token); + hi = lo; + } + + for (int i = lo; i <= hi; i++) { + char nodename[30] = {0}; + sprintf(nodename, "%s%d", host, i); + if (node_idx >= n_nodes) { + return -EINVAL; + } + nodes[node_idx++] = strdup(nodename); + } + token = strtok_r(NULL, ",", &end_str); + } + } else { + // no '[' in the string, meaning it has a single node + if (n_nodes != 1) { + return -EINVAL; + } + nodes[0] = strdup(nodelist_str); + } + + resource->n_nodes = n_nodes; + resource->nodes = nodes; + return 0; +} + // construct_server_argv(): // This function is called in two ways. // Call it once with server_argv==NULL and it @@ -1143,6 +1239,107 @@ static int srun_stage(unifyfs_resource_t* resource, return -errno; } +/** + * @brief Launch servers using Flux + * + * @param resource The job resource record + * @param args The command-line options + * + * @return + */ +static int flux_launch(unifyfs_resource_t* resource, + unifyfs_args_t* args) +{ + size_t argc, flux_argc, server_argc; + char** argv = NULL; + char n_nodes[16]; + char n_tasks[16]; + char n_cores[8]; + + snprintf(n_nodes, sizeof(n_nodes), "-N%zu", resource->n_nodes); + // without -n, --ntasks, Flux will schedule the server job + // to use all nodes exclusively + snprintf(n_tasks, sizeof(n_tasks), "-n%zu", resource->n_nodes); + snprintf(n_cores, sizeof(n_cores), "-c%d", resource->n_cores_per_server); + + // full command: srun + flux_argc = 5; + server_argc = construct_server_argv(args, NULL); + + // setup full command argv + argc = 1 + flux_argc + server_argc; + argv = calloc(argc, sizeof(char*)); + argv[0] = strdup("flux"); + argv[1] = strdup("run"); + argv[2] = strdup(n_nodes); + argv[3] = strdup(n_tasks); + argv[4] = strdup(n_cores); + construct_server_argv(args, argv + flux_argc); + + if (args->debug) { + for (int i = 0; i < (argc - 1); i++) { + fprintf(stdout, "UNIFYFS LAUNCH DEBUG: flux argv[%d] = %s\n", + i, argv[i]); + fflush(stdout); + } + } + + execvp(argv[0], argv); + perror("failed to execvp() flux to launch unifyfsd"); + return -errno; +} + +/** + * @brief Terminate servers using Flux + * + * @param resource The job resource record + * @param args The command-line options + * + * @return + */ +static int flux_terminate(unifyfs_resource_t* resource, + unifyfs_args_t* args) +{ + size_t argc, flux_argc; + char** argv = NULL; + + // full command: flux pkill name:unifyfsd + flux_argc = 3; + argc = 1 + flux_argc; + argv = calloc(argc, sizeof(char*)); + argv[0] = strdup("flux"); + argv[1] = strdup("pkill"); + argv[2] = strdup("name:unifyfsd"); + + execvp(argv[0], argv); + perror("failed to execvp() flux to pkill unifyfsd"); + return -errno; +} + +/** + * @brief Launch unifyfs-stage using flux run + * + * @param resource The job resource record + * @param args The command-line options + * + * @return + */ +static int flux_stage(unifyfs_resource_t* resource, + unifyfs_args_t* args) +{ + size_t flux_argc = 5; + char cmd[200]; + + // full command: flux run + snprintf(cmd, sizeof(cmd), "flux run -N%zu -n%zu -c1", + resource->n_nodes, resource->n_nodes); + + generic_stage(cmd, flux_argc, args); + + perror("failed to execvp() flux to launch unifyfsd"); + return -errno; +} + /** * @brief Launch servers using custom script * @@ -1249,6 +1446,13 @@ static _ucr_resource_manager_t resource_managers[] = { .terminate = &jsrun_terminate, .stage = &jsrun_stage, }, + { + .type = "flux", + .read_resource = &flux_read_resource, + .launch = &flux_launch, + .terminate = &flux_terminate, + .stage = &flux_stage, + }, }; int unifyfs_detect_resources(unifyfs_resource_t* resource) @@ -1257,6 +1461,11 @@ int unifyfs_detect_resources(unifyfs_resource_t* resource) resource->rm = UNIFYFS_RM_PBS; } else if (getenv("SLURM_JOBID") != NULL) { resource->rm = UNIFYFS_RM_SLURM; + } else if (getenv("FLUX_EXEC_PATH") != NULL) { + // TODO: need to use a better environment + // variable or maybe a better way to decide + // whether to use flux scheduler. + resource->rm = UNIFYFS_RM_FLUX; } else if (getenv("LSB_JOBID") != NULL) { if (getenv("CSM_ALLOCATION_ID") != NULL) { resource->rm = UNIFYFS_RM_LSF_CSM;