diff --git a/.bazelrc b/.bazelrc index 84a512b46f..e8fe867ba7 100644 --- a/.bazelrc +++ b/.bazelrc @@ -46,6 +46,13 @@ build --spawn_strategy=standalone build --genrule_strategy=standalone build --define=grpc_no_ares=true +build --define=MEDIAPIPE_DISABLE_GPU=1 +coverage --define=MEDIAPIPE_DISABLE_GPU=1 +test --define=MEDIAPIPE_DISABLE_GPU=1 + +build --define=MEDIAPIPE_DISABLE=0 +coverage --define=MEDIAPIPE_DISABLE=0 +test --define=MEDIAPIPE_DISABLE=0 # Sets the default Apple platform to macOS. build --apple_platform_type=macos @@ -65,6 +72,10 @@ build --cxxopt=-fno-strict-overflow build --cxxopt=-fno-delete-null-pointer-checks build --cxxopt=-fwrapv build --cxxopt=-fstack-protector +build --cxxopt=-fstack-clash-protection +build --cxxopt=-Wformat +build --cxxopt=-Wformat-security +build --cxxopt=-Werror=format-security # Adding "--cxxopt=-D_GLIBCXX_USE_CXX11_ABI=0" creates parity with TF # compilation options. It also addresses memory use due to @@ -75,3 +86,5 @@ build --experimental_repo_remote_exec build --force_pic build --experimental_cc_shared_library +build --check_visibility=true + diff --git a/.gitignore b/.gitignore index 93a01ebb1b..aae49118fb 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,4 @@ lib/ !client/python/ovmsclient/lib cppclean_src cppclean_test +tags diff --git a/BUILD.bazel b/BUILD.bazel new file mode 100644 index 0000000000..53072b64f2 --- /dev/null +++ b/BUILD.bazel @@ -0,0 +1,78 @@ +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +licenses(["notice"]) + +exports_files([ + "package.json", + "yarn.lock", +]) + +load("@bazel_skylib//lib:selects.bzl", "selects") +load("@mediapipe//mediapipe/framework:more_selects.bzl", "more_selects") + +config_setting( + name = "disable_mediapipe", + define_values = { + "MEDIAPIPE_DISABLE": "1", + }, + visibility = ["//visibility:public"], +) + +more_selects.config_setting_negation( + name = "not_disable_mediapipe", + negate = ":disable_mediapipe", +) + +cc_library( + name = "ovms_dependencies", + deps = [ + "@tensorflow_serving//tensorflow_serving/apis:prediction_service_cc_proto", + "@tensorflow_serving//tensorflow_serving/apis:model_service_cc_proto", + "@minitrace//:trace", + "@com_github_grpc_grpc//:grpc++", + "@org_tensorflow//tensorflow/core:framework", + "@com_github_tencent_rapidjson//:rapidjson", + "@com_github_gabime_spdlog//:spdlog", + "@com_github_jarro2783_cxxopts//:cxxopts", + "@awssdk//:s3", + "@awssdk//:core", + "@awssdk//:deps", + "@azure//:storage", + "@cpprest//:sdk", + "@boost//:lib", + "@com_github_googleapis_google_cloud_cpp//google/cloud/storage:storage_client", + "@tensorflow_serving//tensorflow_serving/util/net_http/server/public:http_server", + "@tensorflow_serving//tensorflow_serving/util/net_http/server/public:http_server_api", + "@tensorflow_serving//tensorflow_serving/util:threadpool_executor", + "@tensorflow_serving//tensorflow_serving/util:json_tensor", + "@linux_openvino//:openvino", + "@linux_opencv//:opencv", + "@com_github_jupp0r_prometheus_cpp//core", + "@oneTBB//:tbb", + ] + select({ + "//conditions:default": [ + "@mediapipe//mediapipe/framework:calculator_framework", + "@mediapipe//mediapipe/framework/port:logging", + "@mediapipe//mediapipe/framework/port:parse_text_proto", + "@mediapipe//mediapipe/framework/port:status", + "@mediapipe_calculators//:mediapipe_calculators", + "@model_api//:adapter_api", + ], + "//:disable_mediapipe" : [], + }), + visibility = ["//visibility:public"], +) diff --git a/Dockerfile.redhat b/Dockerfile.redhat index 8d58ddea4a..c4d4c6e42f 100644 --- a/Dockerfile.redhat +++ b/Dockerfile.redhat @@ -1,5 +1,5 @@ # -# Copyright (c) 2020-2021 Intel Corporation +# Copyright (c) 2020-2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -22,6 +22,9 @@ LABEL version="1.0.0" SHELL ["/bin/bash", "-xo", "pipefail", "-c"] ARG JOBS +COPY entitlement /etc/pki/entitlement +COPY rhsm-ca /etc/rhsm/ca +RUN rm -f /etc/rhsm-host RUN dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm && dnf clean all && yum update -d6 -y && yum install -d6 -y \ boost169-atomic \ @@ -127,19 +130,73 @@ RUN dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.n unzip \ vim \ xz \ - https://vault.centos.org/centos/8/AppStream/x86_64/os/Packages/tbb-2018.2-9.el8.x86_64.rpm && \ + https://vault.centos.org/centos/8/AppStream/x86_64/os/Packages/tbb-2018.2-9.el8.x86_64.rpm \ + http://mirror.centos.org/centos/8-stream/PowerTools/x86_64/os/Packages/libusb-devel-0.1.5-12.el8.x86_64.rpm \ + http://mirror.centos.org/centos/8-stream/BaseOS/x86_64/os/Packages/libusb-0.1.5-12.el8.x86_64.rpm \ + http://mirror.centos.org/centos/8-stream/BaseOS/x86_64/os/Packages/libusbx-devel-1.0.23-4.el8.x86_64.rpm && \ yum clean all +ARG NVIDIA=0 +# Add Nvidia dev tool if needed +# hadolint ignore=DL3003 +RUN if [ "$NVIDIA" == "1" ] ; then true ; else exit 0 ; fi ; \ + yum config-manager --save --set-enabled codeready-builder-for-rhel-8-x86_64-rpms && \ + yum -y module disable python36 && \ + yum install -y \ + libzstd-devel \ + libcudnn8-8.6.0.163-1.cuda11.8 \ + libcudnn8-devel-8.6.0.163-1.cuda11.8 \ + libcutensor1-1.6.1.5-1 \ + libcutensor-devel-1.6.1.5-1 \ + cuda-cudart-devel-11-8 && \ + # ignore errors on hosts with older nvidia drivers + yum install -y cuda-11-8 || true && \ + yum install -y python38-Cython && \ + curl -L https://github.com/Kitware/ninja/releases/download/v1.10.0.gfb670.kitware.jobserver-1/ninja-1.10.0.gfb670.kitware.jobserver-1_x86_64-linux-gnu.tar.gz | tar xzv --strip-components=1 -C /usr/local/bin && \ + curl https://github.com/mozilla/sccache/releases/download/v0.2.15/sccache-v0.2.15-x86_64-unknown-linux-musl.tar.gz -L | tar xvzC /usr/local/bin --strip-components=1 --wildcards '*/sccache' && \ + chmod a+x /usr/local/bin/sccache && \ + curl https://github.com/Kitware/CMake/releases/download/v3.24.0/cmake-3.24.0-linux-x86_64.tar.gz -L | tar xzvC /usr/local --exclude={doc,man} --strip-components=1 && \ + curl -L https://github.com/ccache/ccache/releases/download/v4.3/ccache-4.3.tar.xz | tar xJv && \ + mkdir -p ccache-4.3/build && cd ccache-4.3/build && \ + cmake -DCMAKE_BUILD_TYPE=Release -G Ninja .. && \ + ninja -v install && \ + rm -rf /var/cache/yum + # build_type=[ opt, dbg ] ARG build_type=dbg -ARG debug_bazel_flags=--strip=never\ --copt="-g"\ -c\ dbg ARG minitrace_flags ENV TF_SYSTEM_LIBS="curl" ENV TEST_LOG="/root/.cache/bazel/_bazel_root/bc57d4817a53cab8c785464da57d1983/execroot/ovms/bazel-out/test.log" - +ARG ov_source_branch=master +ARG ov_contrib_branch=master +ARG sentencepiece=0 +ARG ov_source_org=openvinotoolkit +ARG ov_contrib_org=openvinotoolkit ARG ov_use_binary=1 ARG DLDT_PACKAGE_URL ARG TEMP_DIR=/tmp/openvino_installer +ARG CMAKE_BUILD_TYPE=Release + +# hadolint ignore=DL3003 +RUN if [[ "$sentencepiece" == "1" || "$NVIDIA" == "1" ]] ; then true ; else exit 0 ; fi ; git clone https://github.com/$ov_contrib_org/openvino_contrib.git /openvino_contrib && cd /openvino_contrib && git checkout $ov_contrib_branch && git submodule update --init --recursive + +################### BUILD OPENVINO FROM SOURCE - buildarg ov_use_binary=0 ############################ +# Build OpenVINO and nGraph (OV dependency) with D_GLIBCXX_USE_CXX11_ABI=0 or 1 +# hadolint ignore=DL3003 +RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; git clone https://github.com/$ov_source_org/openvino.git /openvino && cd /openvino && git checkout $ov_source_branch && git submodule update --init --recursive +WORKDIR /openvino/build +RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; dnf install -y http://mirror.centos.org/centos/8-stream/PowerTools/x86_64/os/Packages/opencl-headers-2.2-1.20180306gite986688.el8.noarch.rpm && dnf clean all +RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; cmake -DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" -DENABLE_SAMPLES=0 -DNGRAPH_USE_CXX_ABI=1 -DCMAKE_CXX_FLAGS=" -D_GLIBCXX_USE_CXX11_ABI=1 -Wno-error=parentheses " .. +RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; make --jobs=$JOBS +RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; make install +RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; \ + mkdir -p /opt/intel/openvino/extras && \ + mkdir -p /opt/intel/openvino && \ + ln -s /openvino/inference-engine/temp/opencv_*/opencv /opt/intel/openvino/extras && \ + ln -s /usr/local/runtime /opt/intel/openvino && \ + ln -s /openvino/scripts/setupvars/setupvars.sh /opt/intel/openvino/setupvars.sh && \ + ln -s /opt/intel/openvino /opt/intel/openvino_2023 +################## END OF OPENVINO SOURCE BUILD ###################### ################### TAKE OPENVINO FROM A BINARY RELEASE - buildarg ov_use_binary=1 (DEFAULT) ########## WORKDIR $TEMP_DIR @@ -149,7 +206,40 @@ RUN if [ "$ov_use_binary" = "1" ] && [ "$DLDT_PACKAGE_URL" != "" ]; then true ; mkdir /opt/intel && \ tar -zxf l_openvino_toolkit*.tgz -C /opt/intel && \ ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino && \ - ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2022 + ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2023 + +# install sample apps including benchmark_app +RUN yum install -y http://mirror.centos.org/centos/8-stream/PowerTools/x86_64/os/Packages/gflags-devel-2.2.2-1.el8.x86_64.rpm \ + http://mirror.centos.org/centos/8-stream/PowerTools/x86_64/os/Packages/gflags-2.2.2-1.el8.x86_64.rpm \ + https://download-ib01.fedoraproject.org/pub/epel/8/Everything/x86_64/Packages/j/json-devel-3.6.1-2.el8.x86_64.rpm && \ + rm -rf /var/cache/yum +RUN if [ -f /opt/intel/openvino/samples/cpp/build_samples.sh ]; then /opt/intel/openvino/samples/cpp/build_samples.sh ; fi +#################### END OF OPENVINO BINARY INSTALL + +# SENTENCEPIECE_EXTENSION +ENV OpenVINO_DIR=/opt/intel/openvino/runtime/cmake +WORKDIR /openvino_contrib/modules/custom_operations/user_ie_extensions +RUN if [ "$sentencepiece" == "1" ] ; then true ; else exit 0 ; fi ; cmake .. -DCMAKE_BUILD_TYPE=Release -DCUSTOM_OPERATIONS="sentence_piece" && cmake --build . --parallel $JOBS + +# NVIDIA +ENV OPENVINO_BUILD_PATH=/cuda_plugin_build +ENV OPENVINO_HOME=/openvino +ENV OPENVINO_CONTRIB=/openvino_contrib + +# hadolint ignore=DL3003 +RUN if [ "$NVIDIA" == "1" ] ; then true ; else exit 0 ; fi ; \ + mkdir "${OPENVINO_BUILD_PATH}" && \ + cd "${OPENVINO_BUILD_PATH}" && \ + cmake "${OPENVINO_HOME}" \ + -DENABLE_NVIDIA=ON \ + -DENABLE_TESTS=ON \ + -DBUILD_arm_plugin=OFF \ + -DBUILD_java_api=OFF \ + -DOPENVINO_EXTRA_MODULES="${OPENVINO_CONTRIB}"/modules \ + -DWHEEL_VERSION=2022.1.0 \ + -DVERBOSE_BUILD=ON \ + -DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" && \ + cmake --build "${OPENVINO_BUILD_PATH}" --target openvino_nvidia_gpu_plugin -j "$JOBS" # Build OpenVINO Model Server WORKDIR /ovms @@ -174,6 +264,14 @@ RUN curl -H "User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHT RUN yum install -y https://github.com/linux-test-project/lcov/releases/download/v1.16/lcov-1.16-1.noarch.rpm && yum clean all WORKDIR /ovms + +ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/intel/openvino/runtime/lib/intel64/:/opt/opencv/lib/:/opt/intel/openvino/runtime/3rdparty/tbb/lib/ + +ARG debug_bazel_flags=--strip=never\ --copt="-g"\ -c\ dbg +RUN if [[ $debug_bazel_flags == *"MEDIAPIPE_DISABLE=1"* ]]; then true ; else exit 0 ; fi ; \ + sed -i -e 's|3.19.1|3.9.2|g' WORKSPACE && \ + sed -i -e 's|87407cd28e7a9c95d9f61a098a53cf031109d451a7763e7dd1253abf8b4df422|1fbf1c2962af287607232b2eddeaec9b4f4a7a6f5934e1a9276e9af76952f7e0|g' WORKSPACE + # hadolint ignore=DL3059 RUN bazel build --jobs=$JOBS ${debug_bazel_flags} @org_tensorflow//tensorflow/core:framework @@ -181,7 +279,13 @@ RUN bazel build --jobs=$JOBS ${debug_bazel_flags} @org_tensorflow//tensorflow/co # hadolint ignore=SC2046 RUN patch -d $(bazel info output_base)/external/build_bazel_rules_apple/ -p1 < /ovms/third_party/build_bazel_rules_apple/bazel_rules_apple.patch -RUN bazel build --jobs=$JOBS ${debug_bazel_flags} @tensorflow_serving//tensorflow_serving/apis:prediction_service_cc_proto +# Mediapipe +COPY BUILD.bazel /ovms/ +COPY yarn.lock /ovms/ +COPY package.json /ovms/ + +# prebuild dependencies before copying sources +RUN bazel build --jobs=$JOBS ${debug_bazel_flags} //:ovms_dependencies # hadolint ignore=DL3059 RUN cp -v /etc/ssl/certs/ca-bundle.crt /etc/ssl/certs/ca-certificates.crt @@ -199,7 +303,7 @@ RUN make WORKDIR /ovms -ARG PROJECT_VERSION="2022.3" +ARG PROJECT_VERSION="2023.0" ARG PROJECT_NAME="OpenVINO Model Server" LABEL description=${PROJECT_NAME} @@ -207,43 +311,44 @@ LABEL description=${PROJECT_NAME} RUN bash -c "sed -i -e 's|REPLACE_PROJECT_NAME|${PROJECT_NAME}|g' /ovms/src/version.hpp" RUN if [ "$build_type" = "dbg" ] ; then bash -c "sed -i -e 's|REPLACE_PROJECT_VERSION|${PROJECT_VERSION}-debug|g' /ovms/src/version.hpp" ; else bash -c "sed -i -e 's|REPLACE_PROJECT_VERSION|${PROJECT_VERSION}|g' /ovms/src/version.hpp" ; fi ; RUN if [ "$ov_use_binary" = "1" ] ; then true ; else exit 0 ; fi ; sed -i -e "s#REPLACE_OPENVINO_NAME#$(find /opt/intel/ -maxdepth 1 -mindepth 1 -type d | grep openvino | grep -Eo '[0-9]{4}.[0-9].[0-9].[0-9]+.[^_]+')#g" /ovms/src/version.hpp - -ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/intel/openvino/runtime/lib/intel64/:/opt/opencv/lib/:/opt/intel/openvino/runtime/3rdparty/tbb/lib/ +RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; sed -i -e "s#REPLACE_OPENVINO_NAME#$(git --git-dir /openvino/.git log -n 1 | head -n 1 | cut -d' ' -f2 | head -c 12)#g" /ovms/src/version.hpp # Test Coverage -COPY check_coverage.bat /ovms/ +COPY ci/check_coverage.bat /ovms/ ARG CHECK_COVERAGE=0 + ARG RUN_TESTS=1 -RUN if [ "$RUN_TESTS" == "1" ] ; then if [ "$CHECK_COVERAGE" = "1" ] ; then bazel coverage --combined_report=lcov --test_summary=detailed --test_output=streamed //src:ovms_test > ${TEST_LOG} 2>&1 || { cat ${TEST_LOG} && rm -rf ${TEST_LOG} && exit 1 ; } && genhtml --output genhtml "$(bazel info output_path)/_coverage/_coverage_report.dat" ; fi ; \ - bazel test ${debug_bazel_flags} --jobs=$JOBS --test_summary=detailed --test_output=streamed //src:ovms_test > ${TEST_LOG} 2>&1 || (cat ${TEST_LOG} && rm -rf ${TEST_LOG} && exit 1 ; ) && tail -n 100 ${TEST_LOG} && rm -rf ${TEST_LOG} ; fi ; +RUN if [ "$RUN_TESTS" == "1" ] ; then if [ "$CHECK_COVERAGE" = "1" ] ; then bazel coverage --combined_report=lcov --jobs=$JOBS ${debug_bazel_flags} --test_timeout=1800 --test_summary=detailed --test_output=streamed --test_filter=-*Stress*Mediapipe* //src:ovms_test > ${TEST_LOG} 2>&1 || { cat ${TEST_LOG} && rm -rf ${TEST_LOG} && exit 1 ; } && genhtml --output genhtml "$(bazel info output_path)/_coverage/_coverage_report.dat" ; fi ; \ + bazel test --jobs=$JOBS ${debug_bazel_flags} --test_timeout=1800 --test_summary=detailed --test_output=streamed --test_filter=-*Stress*Mediapipe* //src:ovms_test > ${TEST_LOG} 2>&1 || (cat ${TEST_LOG} && rm -rf ${TEST_LOG} && exit 1 ; ) && tail -n 100 ${TEST_LOG} && rm -rf ${TEST_LOG} ; fi ; # C api shared library -RUN bazel build ${debug_bazel_flags} --jobs $JOBS //src:ovms_shared +RUN bazel build --jobs $JOBS ${debug_bazel_flags} //src:ovms_shared # C api app with bazel # hadolint ignore=DL3059 -RUN bazel build ${debug_bazel_flags} --jobs $JOBS //src:capi_cpp_example +RUN bazel build --jobs $JOBS ${debug_bazel_flags} //src:capi_cpp_example # C-API benchmark app -RUN bazel build //src:capi_benchmark && ./bazel-bin/src/capi_benchmark --niter 2 --threads_per_ireq 2 --nireq 1 --servable_name "dummy" --inputs_names "b" --shape "b[1,10]" +RUN bazel build --jobs=$JOBS ${debug_bazel_flags} //src:capi_benchmark && ./bazel-bin/src/capi_benchmark --niter 2 --threads_per_ireq 2 --nireq 1 --servable_name "dummy" --inputs_names "b" --shape "b[1,10]" # OVMS -RUN bazel build ${debug_bazel_flags} ${minitrace_flags} --jobs=$JOBS //src:ovms +RUN bazel build --jobs=$JOBS ${debug_bazel_flags} ${minitrace_flags} //src:ovms # hadolint ignore=DL3059 -RUN bazel build ${debug_bazel_flags} --jobs=$JOBS //src:libsampleloader.so +RUN bazel build --jobs=$JOBS ${debug_bazel_flags} //src:libsampleloader.so # C-api C/C++ app with gcc -COPY MakefileCapi . -RUN make -f MakefileCapi cpp && make -f MakefileCapi c +COPY MakefileCapi /ovms/ +RUN make -f MakefileCapi cpp BAZEL_DEBUG_FLAGS="${debug_bazel_flags}" && \ + make -f MakefileCapi c BAZEL_DEBUG_FLAGS="${debug_bazel_flags}" ARG ovms_metadata_file COPY ${ovms_metadata_file} metadata.json -RUN if [ "$build_type" == "dbg" ] ; then bash -c "cp /ovms/bazel-out/k8-dbg/bin/src/ovms /ovms/bazel-bin/src/ovms" ; else exit 0; fi ; RUN /ovms/bazel-bin/src/ovms --version && /ovms/bazel-bin/src/ovms COPY release_files/thirdparty-licenses/ /ovms/release_files/thirdparty-licenses/ COPY release_files/LICENSE /ovms/release_files/LICENSE COPY client /client COPY demos /demos +RUN rm -Rf /etc/entitlement /etc/rhsm/ca diff --git a/Dockerfile.ubuntu b/Dockerfile.ubuntu index 2629dd2f5c..7ad5c0238f 100644 --- a/Dockerfile.ubuntu +++ b/Dockerfile.ubuntu @@ -24,14 +24,10 @@ SHELL ["/bin/bash", "-xo", "pipefail", "-c"] ARG JOBS ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install --no-install-recommends -y \ - libboost-atomic1.71.0 \ - libboost-chrono1.71.0 \ - libboost-filesystem1.71.0 \ - libboost-program-options1.71.0 \ - libboost-thread1.71.0 \ - libboost-system1.71.0 \ - libboost-date-time1.71.0 \ build-essential \ + gcc-9 \ + g++-9 \ + make \ cmake \ automake \ autoconf \ @@ -45,6 +41,8 @@ RUN apt-get update && apt-get install --no-install-recommends -y \ pkg-config \ wget \ zlib1g-dev && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 9 && \ + update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-9 9 && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* @@ -52,7 +50,9 @@ RUN apt-get update && apt-get install --no-install-recommends -y \ WORKDIR /boost # hadolint ignore=DL3003 RUN wget -nv https://sourceforge.net/projects/boost/files/boost/1.69.0/boost_1_69_0.tar.gz && \ -tar xvf boost_1_69_0.tar.gz && cd boost_1_69_0 && ./bootstrap.sh && \ +tar xf boost_1_69_0.tar.gz && cd boost_1_69_0 && ./bootstrap.sh && \ +sed -i -e 's|#if PTHREAD_STACK_MIN > 0|#ifdef PTHREAD_STACK_MIN|g' boost/thread/pthread/thread_data.hpp && \ +# fix for compiler >=9.5 https://github.com/boostorg/thread/pull/297/files ./b2 -j ${JOBS} cxxstd=17 link=static cxxflags='-fPIC' cflags='-fPIC' \ --with-chrono --with-date_time --with-filesystem --with-program_options --with-system \ --with-random --with-thread --with-atomic --with-regex \ @@ -73,13 +73,14 @@ WORKDIR /azure/cpprestsdk/Release/build.release RUN cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON -DCMAKE_CXX_FLAGS="-fPIC" -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DBoost_USE_STATIC_RUNTIME=ON -DBoost_USE_STATIC_LIBS=ON -DWERROR=OFF -DBUILD_SAMPLES=OFF -DBUILD_TESTS=OFF && make --jobs=$JOBS install WORKDIR /azure/azure-storage-cpp/Microsoft.WindowsAzure.Storage/build.release -RUN CASABLANCA_DIR=/azure/cpprestsdk cmake .. -DCMAKE_CXX_FLAGS="-fPIC" -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DBoost_USE_STATIC_RUNTIME=ON -DBoost_USE_STATIC_LIBS=ON -DCMAKE_VERBOSE_MAKEFILE=ON && make --jobs=$JOBS && make --jobs=$JOBS install +RUN CASABLANCA_DIR=/azure/cpprestsdk cmake .. -DCMAKE_CXX_FLAGS="-fPIC -Wno-error=deprecated-declarations" -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DBoost_USE_STATIC_RUNTIME=ON -DBoost_USE_STATIC_LIBS=ON -DCMAKE_VERBOSE_MAKEFILE=ON && make --jobs=$JOBS && make --jobs=$JOBS install +# no-error flag related to https://github.com/aws/aws-sdk-cpp/issues/1582 ####### End of Azure SDK # Build AWS S3 SDK RUN git clone https://github.com/aws/aws-sdk-cpp.git --branch 1.7.129 --single-branch --depth 1 /awssdk WORKDIR /awssdk/build -RUN cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_ONLY=s3 -DENABLE_TESTING=OFF -DBUILD_SHARED_LIBS=OFF -DMINIMIZE_SIZE=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DFORCE_SHARED_CRT=OFF -DSIMPLE_INSTALL=OFF -DCMAKE_CXX_FLAGS=" -D_GLIBCXX_USE_CXX11_ABI=1 " .. && make --jobs=$JOBS +RUN cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_ONLY=s3 -DENABLE_TESTING=OFF -DBUILD_SHARED_LIBS=OFF -DMINIMIZE_SIZE=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DFORCE_SHARED_CRT=OFF -DSIMPLE_INSTALL=OFF -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=1 -Wno-error=deprecated-declarations -Wuninitialized" .. && make --jobs=$JOBS ####### End of AWS S3 SDK @@ -109,23 +110,24 @@ RUN apt-get update && apt-get install --no-install-recommends -y \ nlohmann-json3-dev \ python2 \ python2-dev \ - python-setuptools \ - python3 \ - python3-pip \ - python3-dev \ - python-is-python3 \ - python3-setuptools \ - python3-virtualenv \ - python3-numpy \ - python-is-python3 \ unzip \ vim \ xz-utils && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* +RUN apt-get update && apt-get install -y software-properties-common gpg gpg-agent --no-install-recommends && \ + add-apt-repository ppa:deadsnakes/ppa && \ + apt-get install -y python3.8 python3.8-dev python3.8-distutils python3-pip --no-install-recommends && \ + apt-get clean && rm -rf /var/lib/apt/lists/* && \ + update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.8 1 && \ + python3 -m pip install numpy==1.21.0 --no-cache-dir + ARG JOBS ARG ov_use_binary=1 +ARG sentencepiece=0 +ARG ov_source_org=openvinotoolkit +ARG ov_contrib_org=openvinotoolkit ARG DLDT_PACKAGE_URL ARG ov_source_branch=master ARG ov_contrib_branch=master @@ -134,17 +136,18 @@ ARG CMAKE_BUILD_TYPE=Release # build_type=[ opt, dbg ] ARG build_type=opt -ARG debug_bazel_flags=--strip=never\ --copt="-g"\ -c\ dbg ARG minitrace_flags ENV HDDL_INSTALL_DIR=/opt/intel/openvino/deployment_tools/inference_engine/external/hddl ENV TF_SYSTEM_LIBS="curl" ENV TEST_LOG="/root/.cache/bazel/_bazel_root/bc57d4817a53cab8c785464da57d1983/execroot/ovms/bazel-out/test.log" +ARG NVIDIA=0 +# hadolint ignore=DL3003 +RUN if [[ "$sentencepiece" == "1" || "$NVIDIA" == "1" ]] ; then true ; else exit 0 ; fi ; git clone https://github.com/$ov_contrib_org/openvino_contrib.git /openvino_contrib && cd /openvino_contrib && git checkout $ov_contrib_branch && git submodule update --init --recursive ################### BUILD OPENVINO FROM SOURCE - buildarg ov_use_binary=0 ############################ # Build OpenVINO and nGraph (OV dependency) with D_GLIBCXX_USE_CXX11_ABI=0 or 1 - # hadolint ignore=DL3003 -RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; git clone https://github.com/openvinotoolkit/openvino /openvino && cd /openvino && git checkout $ov_source_branch && git submodule update --init --recursive +RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; git clone https://github.com/$ov_source_org/openvino.git /openvino && cd /openvino && git checkout $ov_source_branch && git submodule update --init --recursive WORKDIR /openvino/build RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; cmake -DCMAKE_BUILD_TYPE=$CMAKE_BUILD_TYPE -DENABLE_SAMPLES=0 -DNGRAPH_USE_CXX_ABI=1 -DCMAKE_CXX_FLAGS=" -D_GLIBCXX_USE_CXX11_ABI=1 -Wno-error=parentheses " .. RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; make --jobs=$JOBS @@ -155,7 +158,7 @@ RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; \ ln -s /openvino/inference-engine/temp/opencv_*_ubuntu20/opencv /opt/intel/openvino/extras && \ ln -s /usr/local/runtime /opt/intel/openvino && \ ln -s /openvino/scripts/setupvars/setupvars.sh /opt/intel/openvino/setupvars.sh && \ - ln -s /opt/intel/openvino /opt/intel/openvino_2022 + ln -s /opt/intel/openvino /opt/intel/openvino_2023 ################## END OF OPENVINO SOURCE BUILD ###################### ARG TEMP_DIR=/tmp/openvino_installer @@ -168,7 +171,7 @@ WORKDIR $TEMP_DIR # chmod 755 l_openvino_toolkit_* && \ # ./l_openvino_toolkit_* -a -s --eula accept && \ # rm -rf /opt/intel/openvino && \ -# ln -s /opt/intel/openvino_2022 /opt/intel/openvino +# ln -s /opt/intel/openvino_2023 /opt/intel/openvino # OV toolkit package RUN if [ "$ov_use_binary" == "1" ] && [ "$DLDT_PACKAGE_URL" != "" ]; then true ; else exit 0 ; fi ; \ @@ -176,7 +179,7 @@ RUN if [ "$ov_use_binary" == "1" ] && [ "$DLDT_PACKAGE_URL" != "" ]; then true ; mkdir /opt/intel && \ tar -zxf l_openvino_toolkit*.tgz -C /opt/intel && \ ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino && \ - ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2022 + ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2023 WORKDIR / # apt package @@ -188,18 +191,27 @@ RUN if [ "$ov_use_binary" = "1" ] && [ "$DLDT_PACKAGE_URL" = "" ] ; then true ; apt-get update && \ apt-get install --no-install-recommends -y $APT_OV_PACKAGE && \ rm -rf /var/lib/apt/lists/* && \ - ln -s /opt/intel/openvino_2022 /opt/intel/openvino + ln -s /opt/intel/openvino_2023 /opt/intel/openvino + +RUN wget -nv https://github.com/oneapi-src/oneTBB/releases/download/v2021.9.0/oneapi-tbb-2021.9.0-lin.tgz && \ + tar -xzf oneapi-tbb-2021.9.0-lin.tgz && \ + cp oneapi-tbb-2021.9.0/lib/intel64/gcc4.8/libtbb.so* /opt/intel/openvino/runtime/lib/intel64/ # install sample apps including benchmark_app RUN if [ -f /opt/intel/openvino/samples/cpp/build_samples.sh ]; then /opt/intel/openvino/samples/cpp/build_samples.sh ; fi #################### END OF OPENVINO BINARY INSTALL +# SENTENCEPIECE_EXTENSION +ENV OpenVINO_DIR=/opt/intel/openvino/runtime/cmake + +WORKDIR /openvino_contrib/modules/custom_operations/user_ie_extensions +RUN if [ "$sentencepiece" == "1" ] ; then true ; else exit 0 ; fi ; cmake .. -DCMAKE_BUILD_TYPE=Release -DCUSTOM_OPERATIONS="sentence_piece" && cmake --build . --parallel $JOBS + # NVIDIA ENV OPENVINO_BUILD_PATH=/cuda_plugin_build ENV OPENVINO_HOME=/openvino ENV OPENVINO_CONTRIB=/openvino_contrib -ARG NVIDIA=0 # Add Nvidia dev tool if needed # hadolint ignore=DL3003 RUN if [ "$NVIDIA" == "1" ] ; then true ; else exit 0 ; fi ; \ @@ -223,7 +235,6 @@ RUN if [ "$NVIDIA" == "1" ] ; then true ; else exit 0 ; fi ; \ # hadolint ignore=DL3003 RUN if [ "$NVIDIA" == "1" ] ; then true ; else exit 0 ; fi ; \ - git clone https://github.com/openvinotoolkit/openvino_contrib.git /openvino_contrib && cd /openvino_contrib && git checkout $ov_contrib_branch && git submodule update --init --recursive && \ mkdir ${OPENVINO_BUILD_PATH} && \ cd "${OPENVINO_BUILD_PATH}" && \ cmake "${OPENVINO_HOME}" \ @@ -231,6 +242,10 @@ RUN if [ "$NVIDIA" == "1" ] ; then true ; else exit 0 ; fi ; \ -DENABLE_TESTS=ON \ -DBUILD_arm_plugin=OFF \ -DBUILD_java_api=OFF \ + -DBUILD_custom_operations=OFF \ + -DBUILD_mo_pytorch=OFF \ + -DBUILD_optimum=OFF \ + -DBUILD_ovms_ai_extension=OFF \ -DOPENVINO_EXTRA_MODULES="${OPENVINO_CONTRIB}/modules" \ -DWHEEL_VERSION=2022.1.0 \ -DVERBOSE_BUILD=ON \ @@ -250,7 +265,13 @@ RUN curl -H "User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHT WORKDIR /ovms COPY .bazelrc WORKSPACE /ovms/ COPY external /ovms/external/ -COPY MakefileCapi /ovms/ + +ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/intel/openvino/runtime/lib/intel64/:/opt/opencv/lib/ + +ARG debug_bazel_flags=--strip=never\ --copt="-g "\ -c\ dbg +RUN if [[ $debug_bazel_flags == *"MEDIAPIPE_DISABLE=1"* ]]; then true ; else exit 0 ; fi ; \ + sed -i -e 's|3.19.1|3.9.2|g' WORKSPACE && \ + sed -i -e 's|87407cd28e7a9c95d9f61a098a53cf031109d451a7763e7dd1253abf8b4df422|1fbf1c2962af287607232b2eddeaec9b4f4a7a6f5934e1a9276e9af76952f7e0|g' WORKSPACE RUN bazel build --jobs=$JOBS ${debug_bazel_flags} @org_tensorflow//tensorflow/core:framework @@ -262,7 +283,13 @@ COPY third_party /ovms/third_party/ # hadolint ignore=SC2046 RUN patch -d $(bazel info output_base)/external/build_bazel_rules_apple/ -p1 < /ovms/third_party/build_bazel_rules_apple/bazel_rules_apple.patch -RUN bazel build --jobs=$JOBS ${debug_bazel_flags} @tensorflow_serving//tensorflow_serving/apis:prediction_service_cc_proto +# Mediapipe +COPY BUILD.bazel /ovms/ +COPY yarn.lock /ovms/ +COPY package.json /ovms/ + +# prebuild dependencies before copying sources +RUN bazel build --jobs=$JOBS ${debug_bazel_flags} //:ovms_dependencies # Copy example clients into build image for static analysis WORKDIR /example_cpp_client @@ -286,38 +313,40 @@ RUN bash -c "sed -i -e 's|REPLACE_PROJECT_NAME|${PROJECT_NAME}|g' /ovms/src/vers RUN if [ "$build_type" == "dbg" ] ; then bash -c "sed -i -e 's|REPLACE_PROJECT_VERSION|${PROJECT_VERSION}-debug|g' /ovms/src/version.hpp" ; else bash -c "sed -i -e 's|REPLACE_PROJECT_VERSION|${PROJECT_VERSION}|g' /ovms/src/version.hpp" ; fi ; RUN if [ "$ov_use_binary" == "1" ] ; then true ; else exit 0 ; fi ; sed -i -e "s#REPLACE_OPENVINO_NAME#$(find /opt/intel/ -maxdepth 1 -mindepth 1 -type d | grep openvino | grep -Eo '[0-9]{4}.[0-9].[0-9].[0-9]+.[^_]+')#g" /ovms/src/version.hpp RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; sed -i -e "s#REPLACE_OPENVINO_NAME#$(git --git-dir /openvino/.git log -n 1 | head -n 1 | cut -d' ' -f2 | head -c 12)#g" /ovms/src/version.hpp -ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/intel/openvino/runtime/lib/intel64/:/opt/opencv/lib/:/opt/intel/openvino/runtime/3rdparty/tbb/lib/ # Test Coverage -COPY check_coverage.bat /ovms/ +COPY ci/check_coverage.bat /ovms/ ARG CHECK_COVERAGE=0 + ARG RUN_TESTS=1 -RUN if [ "$RUN_TESTS" == "1" ] ; then if [ "$CHECK_COVERAGE" == "1" ] ; then bazel coverage --combined_report=lcov --test_summary=detailed --test_output=streamed //src:ovms_test > ${TEST_LOG} 2>&1 || { cat ${TEST_LOG} && rm -rf ${TEST_LOG} && exit 1 ; } && genhtml --output genhtml "$(bazel info output_path)/_coverage/_coverage_report.dat" ; fi ; \ - bazel test ${debug_bazel_flags} --jobs=$JOBS --test_summary=detailed --test_output=streamed //src:ovms_test > ${TEST_LOG} 2>&1 || (cat ${TEST_LOG} && rm -rf ${TEST_LOG} && exit 1 ; ) && tail -n 100 ${TEST_LOG} && rm -rf ${TEST_LOG} ; fi ; +RUN if [ "$RUN_TESTS" == "1" ] ; then if [ "$CHECK_COVERAGE" == "1" ] ; then \ + bazel coverage --combined_report=lcov --jobs=$JOBS ${debug_bazel_flags} --test_timeout=1800 --test_summary=detailed --test_output=streamed --test_filter=-*Stress*Mediapipe* //src:ovms_test > ${TEST_LOG} 2>&1 || { cat ${TEST_LOG} && rm -rf ${TEST_LOG} && exit 1 ; } && genhtml --output genhtml "$(bazel info output_path)/_coverage/_coverage_report.dat" ; fi ; \ + bazel test --jobs=$JOBS ${debug_bazel_flags} --test_timeout=1800 --test_summary=detailed --test_output=streamed --test_filter=-*Stress*Mediapipe* //src:ovms_test > ${TEST_LOG} 2>&1 || (cat ${TEST_LOG} && rm -rf ${TEST_LOG} && exit 1 ; ) && tail -n 100 ${TEST_LOG} && rm -rf ${TEST_LOG} ; fi ; # C api shared library -RUN bazel build ${debug_bazel_flags} --jobs $JOBS //src:ovms_shared +RUN bazel build --jobs=$JOBS ${debug_bazel_flags} //src:ovms_shared # C api app with bazel # hadolint ignore=DL3059 -RUN bazel build ${debug_bazel_flags} --jobs $JOBS //src:capi_cpp_example +RUN bazel build --jobs $JOBS ${debug_bazel_flags} //src:capi_cpp_example # C-API benchmark app -RUN bazel build //src:capi_benchmark && ./bazel-bin/src/capi_benchmark --niter 2 --threads_per_ireq 2 --nireq 1 --servable_name "dummy" --inputs_names "b" --shape "b[1,10]" +RUN bazel build --jobs=$JOBS ${debug_bazel_flags} //src:capi_benchmark && ./bazel-bin/src/capi_benchmark --niter 2 --threads_per_ireq 2 --nireq 1 --servable_name "dummy" --inputs_names "b" --shape "b[1,10]" # OVMS -RUN bazel build ${debug_bazel_flags} ${minitrace_flags} --jobs=$JOBS //src:ovms +RUN bazel build --jobs=$JOBS ${debug_bazel_flags} ${minitrace_flags} //src:ovms # hadolint ignore=DL3059 -RUN bazel build ${debug_bazel_flags} --jobs=$JOBS //src:libsampleloader.so +RUN bazel build --jobs=$JOBS ${debug_bazel_flags} //src:libsampleloader.so # C-api C/C++ app with gcc -RUN make -f MakefileCapi cpp && make -f MakefileCapi c +COPY MakefileCapi /ovms/ +RUN make -f MakefileCapi cpp BAZEL_DEBUG_FLAGS="${debug_bazel_flags}" && \ + make -f MakefileCapi c BAZEL_DEBUG_FLAGS="${debug_bazel_flags}" ARG ovms_metadata_file COPY ${ovms_metadata_file} metadata.json -RUN if [ "$build_type" == "dbg" ] ; then bash -c "cp /ovms/bazel-out/k8-dbg/bin/src/ovms /ovms/bazel-bin/src/ovms" ; else exit 0; fi ; RUN /ovms/bazel-bin/src/ovms --version && /ovms/bazel-bin/src/ovms COPY release_files/thirdparty-licenses/ /ovms/release_files/thirdparty-licenses/ diff --git a/DockerfileMakePackage b/DockerfileMakePackage index 37164dd535..351ad5c66e 100644 --- a/DockerfileMakePackage +++ b/DockerfileMakePackage @@ -21,6 +21,7 @@ FROM $BUILD_IMAGE ARG BASE_OS=ubuntu ARG ov_use_binary=1 ARG NVIDIA=0 +ARG sentencepiece=0 SHELL ["/bin/bash", "-c"] RUN mkdir /patchelf && cd /patchelf && \ @@ -28,28 +29,30 @@ RUN mkdir /patchelf && cd /patchelf && \ tar -xf 0.10.tar.gz && ls -lah && cd */ && \ ./bootstrap.sh && ./configure && make && make install -RUN mkdir -vp /ovms_release/bin -RUN mkdir -vp /ovms_release/deps -RUN mkdir -vp /ovms_release/lib -RUN mkdir -vp /ovms_release/lib/hddl/config -RUN mkdir -vp /ovms_release/lib/custom_nodes +RUN mkdir -vp /ovms_release/bin && \ + mkdir -vp /ovms_release/deps && \ + mkdir -vp /ovms_release/lib && \ + mkdir -vp /ovms_release/lib/hddl/config && \ + mkdir -vp /ovms_release/lib/custom_nodes RUN if [ -d /ovms/src/custom_nodes/lib/${BASE_OS} ] ; then true ; else exit 0 ; fi ; cp /ovms/src/custom_nodes/lib/${BASE_OS}/*.so /ovms_release/lib/custom_nodes/ +RUN if [ -d /ovms/src/custom_nodes/tokenizer/lib/${BASE_OS} ] ; then true ; else exit 0 ; fi ; cp /ovms/src/custom_nodes/tokenizer/lib/${BASE_OS}/*.so /ovms_release/lib/custom_nodes/ RUN cp /ovms/metadata.json /ovms_release/ +RUN if [ "$sentencepiece" == "1" ]; then true ; else exit 0 ; fi ; cp -v /openvino_contrib/modules/custom_operations/user_ie_extensions/user_ie_extensions/libuser_ov_extensions.so /ovms_release/lib/ RUN if [ "$NVIDIA" == "1" ]; then true ; else exit 0 ; fi ; cp -v /openvino/bin/intel64/Release/libopenvino_nvidia_gpu_plugin.so /ovms_release/lib/ -RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; cp -v /openvino/bin/intel64/Release/plugins.xml /ovms_release/lib/ +RUN if [ "$NVIDIA" == "1" ]; then true ; else exit 0 ; fi ; echo '' > /ovms_release/lib/plugins.xml RUN if [ "$ov_use_binary" == "1" ] ; then true ; else exit 0 ; fi ; cp -v /opt/intel/openvino/runtime/3rdparty/hddl/config/* /ovms_release/lib/hddl/config/ || true RUN if [ "$ov_use_binary" == "1" ] ; then true ; else exit 0 ; fi ; cp -vr /opt/intel/openvino/runtime/3rdparty/hddl/etc/* /ovms_release/lib/hddl/etc/ || true -RUN if [ "$ov_use_binary" == "1" ] ; then true ; else exit 0 ; fi ; cp -v /opt/intel/openvino/runtime/lib/intel64/plugins.xml /ovms_release/lib/ && cp /opt/intel/openvino/install_dependencies/* /ovms_release/deps/ +RUN if [ "$ov_use_binary" == "1" ] ; then true ; else exit 0 ; fi ; cp /opt/intel/openvino/install_dependencies/* /ovms_release/deps/ RUN if [ "$ov_use_binary" == "1" ] ; then true ; else exit 0 ; fi ; rm -vrf /ovms_release/deps/*-devel-* RUN find /ovms/bazel-out/k8-*/bin -iname '*.so*' -exec cp -v {} /ovms_release/lib/ \; RUN cd /ovms_release/lib/ ; rm -f libazurestorage.so.* ; ln -s libazurestorage.so libazurestorage.so.7 ;ln -s libazurestorage.so libazurestorage.so.7.5 RUN cd /ovms_release/lib/ ; rm -f libcpprest.so.2.10 ; ln -s libcpprest.so libcpprest.so.2.10 -RUN rm -f /ovms_release/lib/libssl.so -RUN rm -f /ovms_release/lib/libsampleloader* -RUN rm -f /ovms_release/lib/lib_node* -RUN rm -f /ovms_release/lib/libcustom_node* +RUN rm -f /ovms_release/lib/libssl.so && \ + rm -f /ovms_release/lib/libsampleloader* && \ + rm -f /ovms_release/lib/lib_node* && \ + rm -f /ovms_release/lib/libcustom_node* # Remove coverage libaries RUN if [ -f /ovms_release/lib/libjava.so ] ; then true ; else exit 0 ; fi ;cd /ovms_release/lib/ &&\ @@ -78,21 +81,24 @@ RUN if [ "$BASE_OS" == "redhat" ] ; then true ; else exit 0 ; fi ; cp /usr/lib64 RUN find /ovms/bazel-bin/src -name 'ovms' -type f -exec cp -v {} /ovms_release/bin \; WORKDIR /ovms_release/bin RUN patchelf --remove-rpath ./ovms && patchelf --set-rpath '$ORIGIN/../lib/' ./ovms -RUN find /ovms_release/lib/ -iname '*.so*' -exec patchelf --debug --remove-rpath {} \; -RUN find /ovms_release/lib/ -iname '*.so*' -exec patchelf --debug --set-rpath '$ORIGIN/../lib' {} \; +RUN find /ovms_release/lib/ -iname '*.so*' -exec patchelf --debug --remove-rpath {} \; && \ + find /ovms_release/lib/ -iname '*.so*' -exec patchelf --debug --set-rpath '$ORIGIN/../lib' {} \; WORKDIR /ovms -RUN cp -v /ovms/release_files/LICENSE /ovms_release/ -RUN cp -rv /ovms/release_files/thirdparty-licenses /ovms_release/ -RUN mkdir -vp /ovms_release/include && cp /ovms/src/ovms.h /ovms_release/include +RUN cp -v /ovms/release_files/LICENSE /ovms_release/ && \ + cp -rv /ovms/release_files/thirdparty-licenses /ovms_release/ && \ + mkdir -vp /ovms_release/include && cp /ovms/src/ovms.h /ovms_release/include && \ + ls -lahR /ovms_release/ -RUN ls -lahR /ovms_release/ - -RUN find /ovms_release/lib/ -iname '*.so*' -type f -exec patchelf --remove-rpath {} \; -RUN find /ovms_release/lib/ -iname '*.so*' -type f -exec patchelf --set-rpath '$ORIGIN/../lib' {} \; +RUN find /ovms_release/lib/ -iname '*.so*' -type f -exec patchelf --remove-rpath {} \; && \ + find /ovms_release/lib/ -iname '*.so*' -type f -exec patchelf --set-rpath '$ORIGIN/../lib' {} \; WORKDIR / -RUN tar czf ovms.tar.gz --transform 's/ovms_release/ovms/' /ovms_release/ && sha256sum ovms.tar.gz > ovms.tar.gz.sha256 && cp /ovms_release/metadata.json /ovms.tar.gz.metadata.json -RUN tar cJf ovms.tar.xz --transform 's/ovms_release/ovms/' /ovms_release/ && sha256sum ovms.tar.xz > ovms.tar.xz.sha256 && cp /ovms_release/metadata.json /ovms.tar.xz.metadata.json +RUN tar czf ovms.tar.gz --transform 's/ovms_release/ovms/' /ovms_release/ && \ + sha256sum ovms.tar.gz > ovms.tar.gz.sha256 && \ + cp /ovms_release/metadata.json /ovms.tar.gz.metadata.json && \ + tar cJf ovms.tar.xz --transform 's/ovms_release/ovms/' /ovms_release/ && \ + sha256sum ovms.tar.xz > ovms.tar.xz.sha256 && \ + cp /ovms_release/metadata.json /ovms.tar.xz.metadata.json ENTRYPOINT [ ] diff --git a/Makefile b/Makefile index 829e1a09b0..3ac306c64b 100644 --- a/Makefile +++ b/Makefile @@ -44,12 +44,18 @@ CHECK_COVERAGE ?=0 RUN_TESTS ?= 1 NVIDIA ?=0 BUILD_NGINX ?= 0 +MEDIAPIPE_DISABLE ?= 0 # NOTE: when changing any value below, you'll need to adjust WORKSPACE file by hand: # - uncomment source build section, comment binary section # - adjust binary version path - version variable is not passed to WORKSPACE file! -OV_SOURCE_BRANCH ?= master -OV_CONTRIB_BRANCH ?= master +OV_SOURCE_BRANCH ?= releases/2023/0 +OV_CONTRIB_BRANCH ?= releases/2023/0 + +OV_SOURCE_ORG ?= openvinotoolkit +OV_CONTRIB_ORG ?= openvinotoolkit + +SENTENCEPIECE ?= 1 OV_USE_BINARY ?= 1 APT_OV_PACKAGE ?= openvino-2022.1.0 @@ -58,10 +64,15 @@ BAZEL_BUILD_TYPE ?= opt CMAKE_BUILD_TYPE ?= Release MINITRACE ?= OFF +DISABLE_MEDIAPIPE_PARAMS ?= "" +ifeq ($(MEDIAPIPE_DISABLE),1) + DISABLE_MEDIAPIPE_PARAMS = " --define MEDIAPIPE_DISABLE=1 --cxxopt=-DMEDIAPIPE_DISABLE=1 " +endif + ifeq ($(BAZEL_BUILD_TYPE),dbg) - BAZEL_DEBUG_FLAGS=" --strip=never --copt=-g -c dbg " + BAZEL_DEBUG_FLAGS=" --strip=never --copt=-g -c dbg "$(DISABLE_MEDIAPIPE_PARAMS) else - BAZEL_DEBUG_FLAGS=" --strip=never " + BAZEL_DEBUG_FLAGS=" --strip=never "$(DISABLE_MEDIAPIPE_PARAMS) endif ifeq ($(MINITRACE),ON) @@ -79,18 +90,31 @@ ifeq ($(BASE_OS),ubuntu) BASE_OS_TAG=$(BASE_OS_TAG_UBUNTU) ifeq ($(NVIDIA),1) BASE_IMAGE=docker.io/nvidia/cuda:11.8.0-runtime-ubuntu20.04 + BASE_IMAGE_RELEASE=$(BASE_IMAGE) else BASE_IMAGE ?= ubuntu:$(BASE_OS_TAG_UBUNTU) + BASE_IMAGE_RELEASE=$(BASE_IMAGE) + endif + ifeq ($(BASE_OS_TAG_UBUNTU),20.04) + INSTALL_DRIVER_VERSION ?= "22.43.24595" + DLDT_PACKAGE_URL ?= https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.0/linux/l_openvino_toolkit_ubuntu20_2023.0.0.10926.b4452d56304_x86_64.tgz + else ifeq ($(BASE_OS_TAG_UBUNTU),22.04) + INSTALL_DRIVER_VERSION ?= "23.13.26032" + DLDT_PACKAGE_URL ?= https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.0/linux/l_openvino_toolkit_ubuntu22_2023.0.0.10926.b4452d56304_x86_64.tgz endif - INSTALL_DRIVER_VERSION ?= "22.35.24055" - DLDT_PACKAGE_URL ?= https://storage.openvinotoolkit.org/repositories/openvino/packages/2022.3/linux/l_openvino_toolkit_ubuntu20_2022.3.0.9052.9752fafe8eb_x86_64.tgz endif ifeq ($(BASE_OS),redhat) BASE_OS_TAG=$(BASE_OS_TAG_REDHAT) - BASE_IMAGE ?= registry.access.redhat.com/ubi8/ubi:$(BASE_OS_TAG_REDHAT) + ifeq ($(NVIDIA),1) + BASE_IMAGE=docker.io/nvidia/cuda:11.8.0-runtime-ubi8 + BASE_IMAGE_RELEASE=$(BASE_IMAGE) + else + BASE_IMAGE ?= registry.access.redhat.com/ubi8/ubi:$(BASE_OS_TAG_REDHAT) + BASE_IMAGE_RELEASE=registry.access.redhat.com/ubi8/ubi-minimal:$(BASE_OS_TAG_REDHAT) + endif DIST_OS=redhat - INSTALL_DRIVER_VERSION ?= "22.28.23726" - DLDT_PACKAGE_URL ?= https://storage.openvinotoolkit.org/repositories/openvino/packages/2022.3/linux/l_openvino_toolkit_rhel8_2022.3.0.9052.9752fafe8eb_x86_64.tgz + INSTALL_DRIVER_VERSION ?= "22.43.24595" + DLDT_PACKAGE_URL ?= https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.0/linux/l_openvino_toolkit_rhel8_2023.0.0.10926.b4452d56304_x86_64.tgz endif OVMS_CPP_DOCKER_IMAGE ?= openvino/model_server @@ -104,7 +128,7 @@ ifeq ($(NVIDIA),1) endif PRODUCT_NAME = "OpenVINO Model Server" -PRODUCT_VERSION ?= "2022.3.0.1" +PRODUCT_VERSION ?= "2023.0.0" OVMS_CPP_CONTAINTER_NAME ?= server-test$(shell date +%Y-%m-%d-%H.%M.%S) OVMS_CPP_CONTAINTER_PORT ?= 9178 @@ -131,28 +155,22 @@ $(ACTIVATE): cppclean: venv @echo "Checking cppclean..." - @. $(ACTIVATE); bash -c "./cppclean.sh" + @. $(ACTIVATE); bash -c "./ci/cppclean.sh" style: venv clang-format-check cpplint cppclean -sdl-check: venv +hadolint: @echo "Checking SDL requirements..." @echo "Checking docker files..." @./tests/hadolint.sh +bandit: @echo "Checking python files..." - @. $(ACTIVATE); bash -c "bandit -x demos/benchmark/python -r demos/*/python > bandit.txt" - @if ! grep -FRq "No issues identified." bandit.txt; then\ - error Run bandit on demos/*/python/*.py to fix issues.;\ - fi - @rm bandit.txt - @. $(ACTIVATE); bash -c "bandit -r client/python/ > bandit2.txt" - @if ! grep -FRq "No issues identified." bandit2.txt; then\ - error Run bandit on client/python/ to fix issues.;\ - fi - @rm bandit2.txt + @. $(ACTIVATE); bash -c "./ci/bandit.sh" + +license-headers: @echo "Checking license headers in files..." - @. $(ACTIVATE); bash -c "python3 lib_search.py . > missing_headers.txt" + @. $(ACTIVATE); bash -c "python3 ./ci/lib_search.py . > missing_headers.txt" @if ! grep -FRq "All files have headers" missing_headers.txt; then\ echo "Files with missing headers";\ cat missing_headers.txt;\ @@ -160,10 +178,12 @@ sdl-check: venv fi @rm missing_headers.txt +sdl-check: venv hadolint bandit license-headers + @echo "Checking forbidden functions in files..." - @. $(ACTIVATE); bash -c "python3 lib_search.py . functions > forbidden_functions.txt" + @. $(ACTIVATE); bash -c "python3 ./ci/lib_search.py . functions > forbidden_functions.txt" @if ! grep -FRq "All files checked for forbidden functions" forbidden_functions.txt; then\ - error Run python3 lib_search.py . functions - to see forbidden functions file list.;\ + error Run python3 ./ci/lib_search.py . functions - to see forbidden functions file list.;\ fi @rm forbidden_functions.txt @@ -192,25 +212,39 @@ ifeq ($(NVIDIA),1) ifeq ($(OV_USE_BINARY),1) @echo "Building NVIDIA plugin requires OV built from source. To build NVIDIA plugin and OV from source make command should look like this 'NVIDIA=1 OV_USE_BINARY=0 make docker_build'"; exit 1 ; endif + ifeq ($(BASE_OS),redhat) + @echo "copying RH entitlements" + @cp -ru /etc/pki/entitlement . + @mkdir rhsm-ca + @cp -u /etc/rhsm/ca/* rhsm-ca/ + endif endif -ifeq ($(BUILD_CUSTOM_NODES),true) - @echo "Building custom nodes" - @cd src/custom_nodes && make BASE_OS=$(BASE_OS) +ifeq ($(BASE_OS),redhat) + @mkdir -p entitlement + @mkdir -p rhsm-ca endif - @echo "Building docker image $(BASE_OS)" - # Provide metadata information into image if defined - @mkdir -p .workspace - @bash -c '$(eval PROJECT_VER_PATCH:=`git rev-parse --short HEAD`)' - @bash -c '$(eval PROJECT_NAME:=${PRODUCT_NAME})' - @bash -c '$(eval PROJECT_VERSION:=${PRODUCT_VERSION}.${PROJECT_VER_PATCH})' ifeq ($(NO_DOCKER_CACHE),true) $(eval NO_CACHE_OPTION:=--no-cache) @echo "Docker image will be rebuilt from scratch" @docker pull $(BASE_IMAGE) ifeq ($(BASE_OS),redhat) @docker pull registry.access.redhat.com/ubi8/ubi-minimal:$(BASE_OS_TAG_REDHAT) + ifeq ($(NVIDIA),1) + @docker pull docker.io/nvidia/cuda:11.8.0-runtime-ubi8 + endif endif endif +ifeq ($(BUILD_CUSTOM_NODES),true) + @echo "Building custom nodes" + @cd src/custom_nodes && make NO_DOCKER_CACHE=$(NO_DOCKER_CACHE) BASE_OS=$(BASE_OS) BASE_IMAGE=$(BASE_IMAGE) + @cd src/custom_nodes/tokenizer && make NO_DOCKER_CACHE=$(NO_DOCKER_CACHE) BASE_OS=$(BASE_OS) BASE_IMAGE=$(BASE_IMAGE) +endif + @echo "Building docker image $(BASE_OS)" + # Provide metadata information into image if defined + @mkdir -p .workspace + @bash -c '$(eval PROJECT_VER_PATCH:=`git rev-parse --short HEAD`)' + @bash -c '$(eval PROJECT_NAME:=${PRODUCT_NAME})' + @bash -c '$(eval PROJECT_VERSION:=${PRODUCT_VERSION}.${PROJECT_VER_PATCH})' ifneq ($(OVMS_METADATA_FILE),) @cp $(OVMS_METADATA_FILE) .workspace/metadata.json else @@ -219,8 +253,9 @@ endif @cat .workspace/metadata.json docker build $(NO_CACHE_OPTION) -f Dockerfile.$(BASE_OS) . \ --build-arg http_proxy=$(HTTP_PROXY) --build-arg https_proxy=$(HTTPS_PROXY) --build-arg no_proxy=$(NO_PROXY) \ - --build-arg ovms_metadata_file=.workspace/metadata.json --build-arg ov_source_branch="$(OV_SOURCE_BRANCH)" \ - --build-arg ov_use_binary=$(OV_USE_BINARY) --build-arg DLDT_PACKAGE_URL=$(DLDT_PACKAGE_URL) \ + --build-arg ovms_metadata_file=.workspace/metadata.json --build-arg ov_source_branch="$(OV_SOURCE_BRANCH)" --build-arg ov_source_org="$(OV_SOURCE_ORG)" \ + --build-arg ov_contrib_org="$(OV_CONTRIB_ORG)" \ + --build-arg ov_use_binary=$(OV_USE_BINARY) --build-arg sentencepiece=$(SENTENCEPIECE) --build-arg DLDT_PACKAGE_URL=$(DLDT_PACKAGE_URL) \ --build-arg APT_OV_PACKAGE=$(APT_OV_PACKAGE) --build-arg CHECK_COVERAGE=$(CHECK_COVERAGE) --build-arg RUN_TESTS=$(RUN_TESTS)\ --build-arg build_type=$(BAZEL_BUILD_TYPE) --build-arg debug_bazel_flags=$(BAZEL_DEBUG_FLAGS) \ --build-arg CMAKE_BUILD_TYPE=$(CMAKE_BUILD_TYPE) \ @@ -235,7 +270,7 @@ endif targz_package: ovms_builder_image docker build $(NO_CACHE_OPTION) -f DockerfileMakePackage . \ --build-arg http_proxy=$(HTTP_PROXY) --build-arg https_proxy="$(HTTPS_PROXY)" \ - --build-arg ov_use_binary=$(OV_USE_BINARY) --build-arg BASE_OS=$(BASE_OS) \ + --build-arg ov_use_binary=$(OV_USE_BINARY) --build-arg sentencepiece=$(SENTENCEPIECE) --build-arg BASE_OS=$(BASE_OS) \ --build-arg NVIDIA=$(NVIDIA) \ -t $(OVMS_CPP_DOCKER_IMAGE)-pkg:$(OVMS_CPP_IMAGE_TAG) \ --build-arg BUILD_IMAGE=$(OVMS_CPP_DOCKER_IMAGE)-build:$(OVMS_CPP_IMAGE_TAG)$(IMAGE_TAG_SUFFIX) @@ -253,7 +288,7 @@ ovms_release_image: targz_package --build-arg no_proxy=$(NO_PROXY) \ --build-arg INSTALL_RPMS_FROM_URL="$(INSTALL_RPMS_FROM_URL)" \ --build-arg GPU=0 \ - --build-arg BASE_IMAGE=$(BASE_IMAGE) \ + --build-arg BASE_IMAGE=$(BASE_IMAGE_RELEASE) \ --build-arg NVIDIA=$(NVIDIA) \ -t $(OVMS_CPP_DOCKER_IMAGE):$(OVMS_CPP_IMAGE_TAG)$(IMAGE_TAG_SUFFIX) cd dist/$(DIST_OS)/ && docker build $(NO_CACHE_OPTION) -f Dockerfile.$(BASE_OS) . \ @@ -262,7 +297,7 @@ ovms_release_image: targz_package --build-arg INSTALL_RPMS_FROM_URL="$(INSTALL_RPMS_FROM_URL)" \ --build-arg INSTALL_DRIVER_VERSION="$(INSTALL_DRIVER_VERSION)" \ --build-arg GPU=1 \ - --build-arg BASE_IMAGE=$(BASE_IMAGE) \ + --build-arg BASE_IMAGE=$(BASE_IMAGE_RELEASE) \ --build-arg NVIDIA=$(NVIDIA) \ -t $(OVMS_CPP_DOCKER_IMAGE)-gpu:$(OVMS_CPP_IMAGE_TAG)$(IMAGE_TAG_SUFFIX) && \ docker tag $(OVMS_CPP_DOCKER_IMAGE)-gpu:$(OVMS_CPP_IMAGE_TAG)$(IMAGE_TAG_SUFFIX) $(OVMS_CPP_DOCKER_IMAGE):$(OVMS_CPP_IMAGE_TAG)-gpu$(IMAGE_TAG_SUFFIX) @@ -434,6 +469,7 @@ cpu_extension: --build-arg https_proxy=${https_proxy} \ --build-arg no_proxy=${no_proxy} \ --build-arg DLDT_PACKAGE_URL=${DLDT_PACKAGE_URL} \ - --build-arg APT_OV_PACKAGE=${APT_OV_PACKAGE} . + --build-arg APT_OV_PACKAGE=${APT_OV_PACKAGE} \ + --build-arg BASE_IMAGE=${BASE_IMAGE} . mkdir -p ./lib/${BASE_OS} docker cp $$(docker create --rm sample_cpu_extension:latest):/workspace/libcustom_relu_cpu_extension.so ./lib/${BASE_OS} diff --git a/MakefileCapi b/MakefileCapi index a21aefaa28..4265f665f7 100644 --- a/MakefileCapi +++ b/MakefileCapi @@ -13,13 +13,14 @@ # See the License for the specific language governing permissions and # limitations under the License. # + cpp: - bazel build //src:ovms_shared + bazel build ${BAZEL_DEBUG_FLAGS} //src:ovms_shared g++ src/main_capi.cpp -I/ovms/src/ -L/ovms/bazel-bin/src/ -lovms_shared -fPIC --std=c++17 -o /ovms/bazel-bin/src/capi_cpp_example LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/ovms/bazel-bin/src/ /ovms/bazel-bin/src/capi_cpp_example c: - bazel build //src:ovms_shared + bazel build ${BAZEL_DEBUG_FLAGS} //src:ovms_shared gcc -c src/main_capi.c -o /ovms/bazel-bin/src/main_capi.o -std=c99 gcc -o /ovms/bazel-bin/src/capi_c_example /ovms/bazel-bin/src/main_capi.o -lovms_shared -L/ovms/bazel-bin/src/ LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/ovms/bazel-bin/src/ /ovms/bazel-bin/src/capi_c_example diff --git a/README.md b/README.md index eaa6a98ce2..4e09b059ce 100644 --- a/README.md +++ b/README.md @@ -1,33 +1,40 @@ # OpenVINO™ Model Server -![OVMS picture](docs/ovms.png) +Model Server hosts models and makes them accessible to software components over standard network protocols: a client sends a request to the model server, which performs model inference and sends a response back to the client. Model Server offers many advantages for efficient model deployment: +- Remote inference enables using lightweight clients with only the necessary functions to perform API calls to edge or cloud deployments. +- Applications are independent of the model framework, hardware device, and infrastructure. +- Client applications in any programming language that supports REST or gRPC calls can be used to run inference remotely on the model server. +- Clients require fewer updates since client libraries change very rarely. +- Model topology and weights are not exposed directly to client applications, making it easier to control access to the model. +- Ideal architecture for microservices-based applications and deployments in cloud environments – including Kubernetes and OpenShift clusters. +- Efficient resource utilization with horizontal and vertical inference scaling. -OpenVINO™ Model Server (OVMS) is a high-performance system for serving machine learning models. It is based on C++ for high scalability -and optimized for Intel solutions, so that you can take advantage of all the power of the Intel® Xeon® processor or Intel’s AI accelerators -and expose it over a network interface. OVMS uses the same architecture and API as [TensorFlow Serving](https://github.com/tensorflow/serving), -while applying OpenVINO for inference execution. Inference service is provided via gRPC or REST API, making it easy to deploy new algorithms and AI experiments. +![OVMS diagram](docs/ovms_diagram.png) -Model repositories may reside on a locally accessible file system (e.g. NFS), as well as online storage compatible with -Google Cloud Storage (GCS), Amazon S3, or Azure Blob Storage. +OpenVINO™ Model Server (OVMS) is a high-performance system for serving models. Implemented in C++ for scalability and optimized for deployment on Intel architectures, the model server uses the same architecture and API as [TensorFlow Serving](https://github.com/tensorflow/serving) and [KServe](https://github.com/kserve/kserve) while applying OpenVINO for inference execution. Inference service is provided via gRPC or REST API, making deploying new algorithms and AI experiments easy. + +![OVMS picture](docs/ovms_high_level.png) + +The models used by the server need to be stored locally or hosted remotely by object storage services. For more details, refer to [Preparing Model Repository](docs/models_repository.md) documentation. Model server works inside [Docker containers](docs/deploying_server.md), on [Bare Metal](docs/deploying_server.md), and in [Kubernetes environment](docs/deploying_server.md). +Start using OpenVINO Model Server with a fast-forward serving example from the [Quickstart guide](docs/ovms_quickstart.md) or explore [Model Server features](docs/features.md). Read [release notes](https://github.com/openvinotoolkit/model_server/releases) to find out what’s new. Key features: - support for multiple frameworks, such as Caffe, TensorFlow, MXNet, PaddlePaddle and ONNX -- online deployment of new [model versions](https://docs.openvino.ai/2022.2/ovms_docs_model_version_policy.html) -- [configuration updates in runtime](https://docs.openvino.ai/2022.2/ovms_docs_online_config_changes.html) +- online deployment of new [model versions](https://docs.openvino.ai/2023.0/ovms_docs_model_version_policy.html) +- [configuration updates in runtime](https://docs.openvino.ai/2023.0/ovms_docs_online_config_changes.html) - support for AI accelerators, such as -[Intel Movidius Myriad VPUs](https://docs.openvino.ai/2022.2/openvino_docs_OV_UG_supported_plugins_MYRIAD.html), -[GPU](https://docs.openvino.ai/2022.2/openvino_docs_OV_UG_supported_plugins_GPU.html), and -[HDDL](https://docs.openvino.ai/2022.2/openvino_docs_OV_UG_supported_plugins_HDDL.html) -- works with Bare Metal Hosts as well as [Docker containers](https://docs.openvino.ai/2022.3/ovms_docs_deploying_server.html) -- [model reshaping](https://docs.openvino.ai/2022.2/ovms_docs_shape_batch_layout.html) in runtime -- [directed Acyclic Graph Scheduler](https://docs.openvino.ai/2022.2/ovms_docs_dag.html) - connecting multiple models to deploy complex processing solutions and reducing data transfer overhead -- [custom nodes in DAG pipelines](https://docs.openvino.ai/2022.2/ovms_docs_custom_node_development.html) - allowing model inference and data transformations to be implemented with a custom node C/C++ dynamic library -- [serving stateful models](https://docs.openvino.ai/2022.2/ovms_docs_stateful_models.html) - models that operate on sequences of data and maintain their state between inference requests -- [binary format of the input data](https://docs.openvino.ai/2022.2/ovms_docs_binary_input.html) - data can be sent in JPEG or PNG formats to reduce traffic and offload the client applications -- [model caching](https://docs.openvino.ai/2022.2/ovms_docs_model_cache.html) - cache the models on first load and re-use models from cache on subsequent loads -- [metrics](https://docs.openvino.ai/2022.2/ovms_docs_metrics.html) - metrics compatible with Prometheus standard +[Intel Movidius Myriad VPUs](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_supported_plugins_MYRIAD.html), +[GPU](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_supported_plugins_GPU.html), and +- works with Bare Metal Hosts as well as [Docker containers](https://docs.openvino.ai/2023.0/ovms_docs_deploying_server.html) +- [model reshaping](https://docs.openvino.ai/2023.0/ovms_docs_shape_batch_layout.html) in runtime +- [directed Acyclic Graph Scheduler](https://docs.openvino.ai/2023.0/ovms_docs_dag.html) - connecting multiple models to deploy complex processing solutions and reducing data transfer overhead +- [custom nodes in DAG pipelines](https://docs.openvino.ai/2023.0/ovms_docs_custom_node_development.html) - allowing model inference and data transformations to be implemented with a custom node C/C++ dynamic library +- [serving stateful models](https://docs.openvino.ai/2023.0/ovms_docs_stateful_models.html) - models that operate on sequences of data and maintain their state between inference requests +- [binary format of the input data](https://docs.openvino.ai/2023.0/ovms_docs_binary_input.html) - data can be sent in JPEG or PNG formats to reduce traffic and offload the client applications +- [model caching](https://docs.openvino.ai/2023.0/ovms_docs_model_cache.html) - cache the models on first load and re-use models from cache on subsequent loads +- [metrics](https://docs.openvino.ai/2023.0/ovms_docs_metrics.html) - metrics compatible with Prometheus standard **Note:** OVMS has been tested on RedHat, and Ubuntu. The latest publicly released docker images are based on Ubuntu and UBI. @@ -38,26 +45,26 @@ They are stored in: ## Run OpenVINO Model Server -A demonstration on how to use OpenVINO Model Server can be found in [our quick-start guide](https://docs.openvino.ai/2022.2/ovms_docs_quick_start_guide.html). +A demonstration on how to use OpenVINO Model Server can be found in [our quick-start guide](https://docs.openvino.ai/2023.0/ovms_docs_quick_start_guide.html). For more information on using Model Server in various scenarios you can check the following guides: -* [Model repository configuration](https://docs.openvino.ai/2022.2/ovms_docs_models_repository.html) +* [Model repository configuration](https://docs.openvino.ai/2023.0/ovms_docs_models_repository.html) -* [Deployment options](https://docs.openvino.ai/2022.3/ovms_docs_deploying_server.html) +* [Deployment options](https://docs.openvino.ai/2023.0/ovms_docs_deploying_server.html) -* [Performance tuning](https://docs.openvino.ai/2022.2/ovms_docs_performance_tuning.html) +* [Performance tuning](https://docs.openvino.ai/2023.0/ovms_docs_performance_tuning.html) -* [Directed Acyclic Graph Scheduler](https://docs.openvino.ai/2022.2/ovms_docs_dag.html) +* [Directed Acyclic Graph Scheduler](https://docs.openvino.ai/2023.0/ovms_docs_dag.html) -* [Custom nodes development](https://docs.openvino.ai/2022.2/ovms_docs_custom_node_development.html) +* [Custom nodes development](https://docs.openvino.ai/2023.0/ovms_docs_custom_node_development.html) -* [Serving stateful models](https://docs.openvino.ai/2022.2/ovms_docs_stateful_models.html) +* [Serving stateful models](https://docs.openvino.ai/2023.0/ovms_docs_stateful_models.html) * [Deploy using a Kubernetes Helm Chart](https://github.com/openvinotoolkit/operator/tree/main/helm-charts/ovms) * [Deployment using Kubernetes Operator](https://operatorhub.io/operator/ovms-operator) -* [Using binary input data](https://docs.openvino.ai/2022.2/ovms_docs_binary_input.html) +* [Using binary input data](https://docs.openvino.ai/2023.0/ovms_docs_binary_input.html) @@ -71,7 +78,7 @@ For more information on using Model Server in various scenarios you can check th * [RESTful API](https://restfulapi.net/) -* [Benchmarking results](https://docs.openvino.ai/2022.1/openvino_docs_performance_benchmarks_ovms.html) +* [Benchmarking results](https://docs.openvino.ai/2023.0/openvino_docs_performance_benchmarks_ovms.html) * [Speed and Scale AI Inference Operations Across Multiple Architectures](https://techdecoded.intel.io/essentials/speed-and-scale-ai-inference-operations-across-multiple-architectures/?elq_cid=3646480_ts1607680426276&erpm_id=6470692_ts1607680426276) - webinar recording diff --git a/WORKSPACE b/WORKSPACE index ce28c3b851..dcef650751 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -70,6 +70,109 @@ git_repository( # allow all http methods ) +########################################################### Mediapipe +http_archive( + name = "com_google_protobuf", + sha256 = "87407cd28e7a9c95d9f61a098a53cf031109d451a7763e7dd1253abf8b4df422", + strip_prefix = "protobuf-3.19.1", + urls = ["https://github.com/protocolbuffers/protobuf/archive/v3.19.1.tar.gz"], + #patches = [ + # "@//third_party:com_google_protobuf_fixes.diff" + #], + #patch_args = [ + # "-p1", + #], +) + +################################### Official mediapipe repository ######### +#### Will be used on feature release +git_repository( + name = "mediapipe", + remote = "https://github.com/google/mediapipe", + tag = "v0.9.1", +) + +# DEV mediapipe 1 source - adjust local repository path for build +#local_repository( +# name = "mediapipe", +# path = "/mediapipe/", +#) + +# Protobuf for Node dependencies +http_archive( + name = "rules_proto_grpc", + sha256 = "bbe4db93499f5c9414926e46f9e35016999a4e9f6e3522482d3760dc61011070", + strip_prefix = "rules_proto_grpc-4.2.0", + urls = ["https://github.com/rules-proto-grpc/rules_proto_grpc/archive/4.2.0.tar.gz"], +) + +# Node dependencies +http_archive( + name = "build_bazel_rules_nodejs", + sha256 = "5aae76dced38f784b58d9776e4ab12278bc156a9ed2b1d9fcd3e39921dc88fda", + urls = ["https://github.com/bazelbuild/rules_nodejs/releases/download/5.7.1/rules_nodejs-5.7.1.tar.gz"], +) + +load("@build_bazel_rules_nodejs//:repositories.bzl", "build_bazel_rules_nodejs_dependencies") +build_bazel_rules_nodejs_dependencies() + +# fetches nodejs, npm, and yarn +load("@build_bazel_rules_nodejs//:index.bzl", "node_repositories", "yarn_install") +node_repositories() +yarn_install( + name = "npm", + package_json = "//:package.json", + yarn_lock = "//:yarn.lock", +) + +http_archive( + name = "com_google_protobuf_javascript", + sha256 = "35bca1729532b0a77280bf28ab5937438e3dcccd6b31a282d9ae84c896b6f6e3", + strip_prefix = "protobuf-javascript-3.21.2", + urls = ["https://github.com/protocolbuffers/protobuf-javascript/archive/refs/tags/v3.21.2.tar.gz"], +) + +http_archive( + name = "rules_foreign_cc", + strip_prefix = "rules_foreign_cc-0.1.0", + url = "https://github.com/bazelbuild/rules_foreign_cc/archive/0.1.0.zip", +) + +load("@rules_foreign_cc//:workspace_definitions.bzl", "rules_foreign_cc_dependencies") + +rules_foreign_cc_dependencies() + +# gflags needed by glog +http_archive( + name = "com_github_gflags_gflags", + strip_prefix = "gflags-2.2.2", + sha256 = "19713a36c9f32b33df59d1c79b4958434cb005b5b47dc5400a7a4b078111d9b5", + url = "https://github.com/gflags/gflags/archive/v2.2.2.zip", +) + +git_repository( + name = "com_github_glog_glog", + remote = "https://github.com/google/glog", + tag = "v0.5.0", +) + +load("@mediapipe//third_party:external_files.bzl", "external_files") +external_files() + +new_local_repository( + name = "linux_openvino", + build_file = "@//third_party/openvino:BUILD", + path = "/opt/intel/openvino/runtime", +) + +new_local_repository( + name = "linux_opencv", + build_file = "@//third_party/opencv:BUILD", + path = "/opt/opencv/", +) + +########################################################### Mediapipe end + # minitrace new_git_repository( name = "minitrace", @@ -179,7 +282,7 @@ grpc_extra_deps() # cxxopts http_archive( - name = "cxxopts", + name = "com_github_jarro2783_cxxopts", url = "https://github.com/jarro2783/cxxopts/archive/v2.2.0.zip", sha256 = "f9640c00d9938bedb291a21f9287902a3a8cee38db6910b905f8eba4a6416204", strip_prefix = "cxxopts-2.2.0", @@ -188,7 +291,7 @@ http_archive( # RapidJSON http_archive( - name = "rapidjson", + name = "com_github_tencent_rapidjson", url = "https://github.com/Tencent/rapidjson/archive/v1.1.0.zip", sha256 = "8e00c38829d6785a2dfb951bb87c6974fa07dfe488aa5b25deec4b8bc0f6a3ab", strip_prefix = "rapidjson-1.1.0", @@ -197,7 +300,7 @@ http_archive( # spdlog http_archive( - name = "spdlog", + name = "com_github_gabime_spdlog", url = "https://github.com/gabime/spdlog/archive/v1.4.0.tar.gz", sha256 = "afd18f62d1bc466c60bef088e6b637b0284be88c515cedc59ad4554150af6043", strip_prefix = "spdlog-1.4.0", @@ -248,3 +351,32 @@ new_local_repository( path = "/opt/opencv", ) ################## END OF OPENCV DEPENDENCY ########## + +new_git_repository( + name = "model_api", + remote = "https:///github.com/openvinotoolkit/model_api/", + build_file_content = """ +cc_library( + name = "adapter_api", + hdrs = ["model_api/cpp/adapters/include/adapters/inference_adapter.h",], + includes = ["model_api/cpp/adapters/include"], + deps = ["@linux_openvino//:openvino"], + visibility = ["//visibility:public"], +) + """, + commit = "7e163416c60ba9ccdf440c6c049d6c7e7137e144" +) + +git_repository( + name = "oneTBB", + branch = "v2021.8.0", + remote = "https://github.com/oneapi-src/oneTBB/", + patch_args = ["-p1"], + patches = ["mwaitpkg.patch",] +) + +new_local_repository( + name = "mediapipe_calculators", + build_file = "@//third_party/mediapipe_calculators:BUILD", + path = "/ovms/third_party/mediapipe_calculators", +) diff --git a/ci/Dockerfile.coverity b/ci/Dockerfile.coverity index a87e0a455e..10fddf10ec 100644 --- a/ci/Dockerfile.coverity +++ b/ci/Dockerfile.coverity @@ -30,7 +30,10 @@ RUN cd $(bazel info output_base)/external/build_bazel_rules_apple/ && patch -p1 WORKDIR /ovms/ RUN /cov/bin/cov-configure --gcc --config coverity_config.xml && \ /cov/bin/cov-configure --comptype gcc --compiler /usr/bin/gcc && \ - /cov/bin/cov-build --dir cov-int bash -c 'bazel shutdown; bazel clean; bazel build --spawn_strategy=standalone //src:static_analysis && cmake /client/cpp/kserve-api && make --jobs=$(nproc); \ + /cov/bin/cov-build --dir cov-int bash -c 'bazel shutdown; bazel clean; bazel build --spawn_strategy=standalone //src:static_analysis && cmake /client/cpp/kserve-api && make --jobs=$(nproc) && \ + cd /ovms/src/custom_nodes/tokenizer && \ + mkdir build && cd build && \ + cmake .. && make --jobs=$(nproc) && \ cd /example_cpp_client/cpp && \ bazel build --spawn_strategy=standalone //src:all' diff --git a/ci/bandit.sh b/ci/bandit.sh new file mode 100755 index 0000000000..14a5196f64 --- /dev/null +++ b/ci/bandit.sh @@ -0,0 +1,37 @@ +#!/bin/bash +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +BANDIT_RESULTS="bandit.txt" +function error_handle { + cat ${BANDIT_RESULTS} +} +function cleanup { + echo "Cleaning up bandit scan" + rm ${BANDIT_RESULTS} +} +trap error_handle ERR +trap cleanup EXIT +bandit -x demos/benchmark/python -r demos/*/python > ${BANDIT_RESULTS} +if ! grep -FRq "No issues identified." ${BANDIT_RESULTS}; then + echo "Bandit scan failed for demos"; + exit 1; +fi +rm ${BANDIT_RESULTS} +bandit -r client/python/ > ${BANDIT_RESULTS} +if ! grep -FRq "No issues identified." ${BANDIT_RESULTS}; then + echo "Bandit scan failed for client"; + exit 2; +fi +exit 0 diff --git a/check_coverage.bat b/ci/check_coverage.bat similarity index 87% rename from check_coverage.bat rename to ci/check_coverage.bat index 2c41c30be8..c3fdea87e7 100755 --- a/check_coverage.bat +++ b/ci/check_coverage.bat @@ -1,12 +1,12 @@ #!/bin/bash #Ubuntu -#MIN_LINES_COV=76.3 -#MIN_FUNCTION_COV=88.3 +MIN_LINES_COV=76.8 +MIN_FUNCTION_COV=87.6 #Rhel -MIN_LINES_COV=74.8 -MIN_FUNCTION_COV=75.8 +MIN_LINES_COV=75.6 +MIN_FUNCTION_COV=75.4 LINES_COV=`cat genhtml/index.html | grep "headerCovTableEntry.*%" | grep -oP ">\K(\d*.\d*) " | head -n 1` FUNC_COV=`cat genhtml/index.html | grep "headerCovTableEntry.*%" | grep -oP ">\K(\d*.\d*) " | tail -n 1` diff --git a/cppclean.sh b/ci/cppclean.sh similarity index 58% rename from cppclean.sh rename to ci/cppclean.sh index e18ad3c07d..47b7834565 100755 --- a/cppclean.sh +++ b/ci/cppclean.sh @@ -16,37 +16,55 @@ # CPPCLEAN_RESULTS_FILE_SRC="cppclean_src" CPPCLEAN_RESULTS_FILE_TEST="cppclean_test" -cppclean ./src/ 2>&1 | grep -v test > ${CPPCLEAN_RESULTS_FILE_SRC}; -cppclean ./src/ 2>&1 | grep test > ${CPPCLEAN_RESULTS_FILE_TEST}; +cppclean ./src/ 2>&1 | grep -v "unable to find" | grep -v test > ${CPPCLEAN_RESULTS_FILE_SRC}; +cppclean ./src/ 2>&1 | grep -v "unable to find" | grep test > ${CPPCLEAN_RESULTS_FILE_TEST}; NO_WARNINGS=$(wc -l ${CPPCLEAN_RESULTS_FILE_SRC} | awk '{print $1}') -NO_WARNINGS_TEST=$(wc -l ${CPPCLEAN_RESULTS_FILE_TEST} | awk '{print $1}') NO_WARNINGS_FORWARD=$(grep "use a forward declaration instead" ${CPPCLEAN_RESULTS_FILE_SRC} | wc -l) NO_WARNINGS_DIRECT=$(grep "not found in any directly #included header" ${CPPCLEAN_RESULTS_FILE_SRC} | wc -l) NO_WARNINGS_NOTUSED=$(grep " not used$" ${CPPCLEAN_RESULTS_FILE_SRC} | wc -l) +NO_WARNINGS_TEST=$(wc -l ${CPPCLEAN_RESULTS_FILE_TEST} | awk '{print $1}') +NO_WARNINGS_TEST_FORWARD=$(grep "use a forward declaration instead" ${CPPCLEAN_RESULTS_FILE_TEST} | wc -l) +NO_WARNINGS_TEST_DIRECT=$(grep "not found in any directly #included header" ${CPPCLEAN_RESULTS_FILE_TEST} | wc -l) +NO_WARNINGS_TEST_NOTUSED=$(grep " not used$" ${CPPCLEAN_RESULTS_FILE_TEST} | wc -l) echo "Number of warnings:" ${NO_WARNINGS} echo "Number of warnings in tests:" ${NO_WARNINGS_TEST} echo "Number of warnings about not using forward delares:" ${NO_WARNINGS_FORWARD} echo "Number of warnings about not direct includes:" ${NO_WARNINGS_DIRECT} echo "Number of warnings about not used: " ${NO_WARNINGS_NOTUSED} +echo "Number of warnings in tests about not using forward delares:" ${NO_WARNINGS_TEST_FORWARD} +echo "Number of warnings in tests about not direct includes:" ${NO_WARNINGS_TEST_DIRECT} +echo "Number of warnings in tests about not used: " ${NO_WARNINGS_TEST_NOTUSED} trap "cat ${CPPCLEAN_RESULTS_FILE_SRC}" err exit -if [ ${NO_WARNINGS_FORWARD} -gt 6 ]; then +if [ ${NO_WARNINGS_FORWARD} -gt 7 ]; then echo "Failed due to not using forward declarations where possible: ${NO_WARNINGS_FORWARD}"; exit 1; fi -if [ ${NO_WARNINGS_DIRECT} -gt 14 ]; then +if [ ${NO_WARNINGS_DIRECT} -gt 13 ]; then echo "Failed probably due to not using static keyword with functions definitions: ${NO_WARNINGS_DIRECT}"; exit 1; fi -if [ ${NO_WARNINGS_NOTUSED} -gt 3 ]; then +if [ ${NO_WARNINGS_NOTUSED} -gt 4 ]; then echo "Failed probably due to unnecessary forward includes: ${NO_WARNINGS_NOTUSED}"; exit 1; fi -if [ ${NO_WARNINGS} -gt 196 ]; then +if [ ${NO_WARNINGS_TEST_FORWARD} -gt 1 ]; then + echo "Failed due to not using forward declarations where possible: ${NO_WARNINGS_TEST_FORWARD}"; + exit 1; +fi +if [ ${NO_WARNINGS_TEST_DIRECT} -gt 12 ]; then + echo "Failed probably due to not using static keyword with functions definitions: ${NO_WARNINGS_TEST_DIRECT}"; + exit 1; +fi +if [ ${NO_WARNINGS_TEST_NOTUSED} -gt 0 ]; then + echo "Failed probably due to unnecessary forward includes: ${NO_WARNINGS_TEST_NOTUSED}"; + exit 1; +fi +if [ ${NO_WARNINGS} -gt 159 ]; then echo "Failed due to higher than allowed number of issues in code: ${NO_WARNINGS}" exit 1 fi -if [ ${NO_WARNINGS_TEST} -gt 128 ]; then +if [ ${NO_WARNINGS_TEST} -gt 55 ]; then echo "Failed due to higher than allowed number of issues in test code: ${NO_WARNINGS_TEST}" cat ${CPPCLEAN_RESULTS_FILE_TEST} exit 1 diff --git a/lib_search.py b/ci/lib_search.py similarity index 96% rename from lib_search.py rename to ci/lib_search.py index c6bc38ba8e..ab6bf6b24a 100644 --- a/lib_search.py +++ b/ci/lib_search.py @@ -90,6 +90,7 @@ def check_dir(start_dir): 'cppclean_test', 'docx', 'dummy.xml', + 'passthrough.xml', 'forbidden_functions.txt', 'gif', 'increment_1x3x4x5.xml', @@ -114,6 +115,11 @@ def check_dir(start_dir): 'bazel_rules_apple.patch', "pom.xml", "go.sum", + "mwaitpkg.patch", + 'saved_model.pb', + "yarn.lock", + "BUILD.bazel", + "package.json", ] exclude_directories = ['/dist/', 'extras/ovms-operator', 'extras/openvino-operator-openshift', 'release_files/thirdparty-licenses'] @@ -169,6 +175,7 @@ def check_func(start_dir): 'client_requirements.txt', 'docx', 'dummy.xml', + 'passthrough.xml', 'forbidden_functions.txt', 'input_images.txt', 'libevent/BUILD', @@ -189,6 +196,9 @@ def check_func(start_dir): 'tftext.patch', 'zlib.LICENSE.txt', 'bazel_rules_apple.patch', + 'yarn.lock', + 'BUILD.bazel', + 'package.json', ] exclude_directories = ['/dist/', 'extras/ovms-operator'] diff --git a/client/cpp/kserve-api/samples/grpc_async_infer_resnet.cpp b/client/cpp/kserve-api/samples/grpc_async_infer_resnet.cpp index 1a18c121a8..244591623d 100644 --- a/client/cpp/kserve-api/samples/grpc_async_infer_resnet.cpp +++ b/client/cpp/kserve-api/samples/grpc_async_infer_resnet.cpp @@ -62,7 +62,7 @@ namespace tc = triton::client; } \ } -std::vector load(const std::string& fileName) { +std::string load(const std::string& fileName) { std::ifstream file(fileName, std::ios::binary); file.unsetf(std::ios::skipws); std::streampos fileSize; @@ -70,16 +70,13 @@ std::vector load(const std::string& fileName) { file.seekg(0, std::ios::end); fileSize = file.tellg(); file.seekg(0, std::ios::beg); + std::ostringstream oss; + oss << file.rdbuf(); - std::vector vec; - vec.reserve(fileSize); - - vec.insert(vec.begin(), - std::istream_iterator(file), - std::istream_iterator()); - return vec; + return oss.str(); } + int main(int argc, char** argv) { cxxopts::Options opt("grpc_async_infer_resnet", "Sends requests via KServe gRPC API."); @@ -151,14 +148,19 @@ int main(int argc, char** argv) { std::vector shape{1}; - // Initialize the inputs with the data. - tc::InferInput* input; + std::vector inputs; + std::vector> input_ptrs; + for (int i = 0; i < imgs.size(); i++) { + tc::InferInput* input; + inputs.push_back(input); - FAIL_IF_ERR( - tc::InferInput::Create(&input, input_name, shape, "BYTES"), - "unable to get input"); - std::shared_ptr input_ptr; - input_ptr.reset(input); + FAIL_IF_ERR( + tc::InferInput::Create(&input, input_name, shape, "BYTES"), + "unable to get input"); + std::shared_ptr input_ptr; + input_ptr.reset(input); + input_ptrs.push_back(input_ptr); + } tc::InferOptions options(model_name); if (args.count("model_version")) @@ -169,7 +171,6 @@ int main(int argc, char** argv) { std::cerr << "The provided argument is of a wrong type" << std::endl; return 1; } - std::vector inputs = {input_ptr.get()}; std::vector classes; std::ifstream lb_f(args["labels_list"].as()); @@ -178,7 +179,7 @@ int main(int argc, char** argv) { classes.push_back(tmp); } - std::vector> input_data; + std::vector input_data; input_data.reserve(imgs.size()); for (int i = 0; i < imgs.size(); i++) { try { @@ -196,8 +197,9 @@ int main(int argc, char** argv) { int completedRequestCount = 0; auto start = std::chrono::high_resolution_clock::now(); for (int i = 0; i < imgs.size(); i++) { + std::vector inputs = {input_ptrs[i].get()}; FAIL_IF_ERR( - input_ptr->AppendRaw(input_data[i]), + input_ptrs[i]->AppendFromString({input_data[i]}), "unable to set data for input"); client->AsyncInfer( [&, i](tc::InferResult* result) -> int { @@ -231,7 +233,6 @@ int main(int argc, char** argv) { return 0; }, options, inputs); - input->Reset(); } { std::unique_lock lk(mtx); diff --git a/client/cpp/kserve-api/samples/grpc_infer_resnet.cpp b/client/cpp/kserve-api/samples/grpc_infer_resnet.cpp index 0c5d7ad626..3d27afe17b 100644 --- a/client/cpp/kserve-api/samples/grpc_infer_resnet.cpp +++ b/client/cpp/kserve-api/samples/grpc_infer_resnet.cpp @@ -59,7 +59,7 @@ namespace tc = triton::client; } \ } -std::vector load(const std::string& fileName) { +std::string load(const std::string& fileName) { std::ifstream file(fileName, std::ios::binary); file.unsetf(std::ios::skipws); std::streampos fileSize; @@ -67,14 +67,10 @@ std::vector load(const std::string& fileName) { file.seekg(0, std::ios::end); fileSize = file.tellg(); file.seekg(0, std::ios::beg); + std::ostringstream oss; + oss << file.rdbuf(); - std::vector vec; - vec.reserve(fileSize); - - vec.insert(vec.begin(), - std::istream_iterator(file), - std::istream_iterator()); - return vec; + return oss.str(); } int main(int argc, char** argv) { @@ -171,7 +167,7 @@ int main(int argc, char** argv) { std::vector results; results.resize(imgs.size()); for (int i = 0; i < imgs.size(); i++) { - std::vector input_data; + std::string input_data; try { input_data = load(imgs[i]); } @@ -180,7 +176,7 @@ int main(int argc, char** argv) { return 1; } FAIL_IF_ERR( - input_ptr->AppendRaw(input_data), + input_ptr->AppendFromString({input_data}), "unable to set data for input"); FAIL_IF_ERR( client->Infer(&(results[i]), options, inputs), diff --git a/client/cpp/kserve-api/samples/http_async_infer_resnet.cpp b/client/cpp/kserve-api/samples/http_async_infer_resnet.cpp index d9d0f61f0b..cafecf85ba 100644 --- a/client/cpp/kserve-api/samples/http_async_infer_resnet.cpp +++ b/client/cpp/kserve-api/samples/http_async_infer_resnet.cpp @@ -62,7 +62,7 @@ namespace tc = triton::client; } \ } -std::vector load(const std::string& fileName) { +std::string load(const std::string& fileName) { std::ifstream file(fileName, std::ios::binary); file.unsetf(std::ios::skipws); std::streampos fileSize; @@ -70,16 +70,13 @@ std::vector load(const std::string& fileName) { file.seekg(0, std::ios::end); fileSize = file.tellg(); file.seekg(0, std::ios::beg); + std::ostringstream oss; + oss << file.rdbuf(); - std::vector vec; - vec.reserve(fileSize); - - vec.insert(vec.begin(), - std::istream_iterator(file), - std::istream_iterator()); - return vec; + return oss.str(); } + int main(int argc, char** argv) { cxxopts::Options opt("http_async_infer_resnet", "Sends requests via KServe REST API."); @@ -151,14 +148,19 @@ int main(int argc, char** argv) { std::vector shape{1}; - // Initialize the inputs with the data. - tc::InferInput* input; + std::vector inputs; + std::vector> input_ptrs; + for (int i = 0; i < imgs.size(); i++) { + tc::InferInput* input; + inputs.push_back(input); - FAIL_IF_ERR( - tc::InferInput::Create(&input, input_name, shape, "BYTES"), - "unable to get input"); - std::shared_ptr input_ptr; - input_ptr.reset(input); + FAIL_IF_ERR( + tc::InferInput::Create(&input, input_name, shape, "BYTES"), + "unable to get input"); + std::shared_ptr input_ptr; + input_ptr.reset(input); + input_ptrs.push_back(input_ptr); + } tc::InferOptions options(model_name); if (args.count("model_version")) @@ -169,7 +171,6 @@ int main(int argc, char** argv) { std::cerr << "The provided argument is of a wrong type" << std::endl; return 1; } - std::vector inputs = {input_ptr.get()}; std::vector classes; std::ifstream lb_f(args["labels_list"].as()); @@ -187,7 +188,7 @@ int main(int argc, char** argv) { output_ptr.reset(output); std::vector outputs = {output_ptr.get()}; - std::vector> input_data; + std::vector input_data; input_data.reserve(imgs.size()); for (int i = 0; i < imgs.size(); i++) { try { @@ -204,8 +205,9 @@ int main(int argc, char** argv) { int completedRequestCount = 0; auto start = std::chrono::high_resolution_clock::now(); for (int i = 0; i < imgs.size(); i++) { + std::vector inputs = {input_ptrs[i].get()}; FAIL_IF_ERR( - input_ptr->AppendRaw(input_data[i]), + input_ptrs[i]->AppendFromString({input_data[i]}), "unable to set data for input"); client->AsyncInfer( [&, i](tc::InferResult* result) -> int { @@ -239,7 +241,6 @@ int main(int argc, char** argv) { return 0; }, options, inputs, outputs); - input->Reset(); } { std::unique_lock lk(mtx); diff --git a/client/cpp/kserve-api/samples/http_infer_resnet.cpp b/client/cpp/kserve-api/samples/http_infer_resnet.cpp index ad02374db3..ba21b0da4c 100644 --- a/client/cpp/kserve-api/samples/http_infer_resnet.cpp +++ b/client/cpp/kserve-api/samples/http_infer_resnet.cpp @@ -59,7 +59,7 @@ namespace tc = triton::client; } \ } -std::vector load(const std::string& fileName) { +std::string load(const std::string& fileName) { std::ifstream file(fileName, std::ios::binary); file.unsetf(std::ios::skipws); std::streampos fileSize; @@ -67,16 +67,13 @@ std::vector load(const std::string& fileName) { file.seekg(0, std::ios::end); fileSize = file.tellg(); file.seekg(0, std::ios::beg); + std::ostringstream oss; + oss << file.rdbuf(); - std::vector vec; - vec.reserve(fileSize); - - vec.insert(vec.begin(), - std::istream_iterator(file), - std::istream_iterator()); - return vec; + return oss.str(); } + int main(int argc, char** argv) { cxxopts::Options opt("http_infer_resnet", "Sends requests via KServe REST API."); @@ -181,7 +178,7 @@ int main(int argc, char** argv) { std::vector results; results.resize(imgs.size()); for (int i = 0; i < imgs.size(); i++) { - std::vector input_data; + std::string input_data; try { input_data = load(imgs[i]); } @@ -190,7 +187,7 @@ int main(int argc, char** argv) { return 1; } FAIL_IF_ERR( - input_ptr->AppendRaw(input_data), + input_ptr->AppendFromString({input_data}), "unable to set data for input"); FAIL_IF_ERR( client->Infer(&(results[i]), options, inputs, outputs), diff --git a/client/go/kserve-api/grpc_infer_resnet.go b/client/go/kserve-api/grpc_infer_resnet.go index 547b87a344..22af02b6ab 100644 --- a/client/go/kserve-api/grpc_infer_resnet.go +++ b/client/go/kserve-api/grpc_infer_resnet.go @@ -84,13 +84,20 @@ func ModelInferRequest(client grpc_client.GRPCInferenceServiceClient, fileName s ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() + bytes, err := ioutil.ReadFile(fileName) + contents := grpc_client.InferTensorContents{} + contents.BytesContents = append(contents.BytesContents, bytes) + + inferInput := grpc_client.ModelInferRequest_InferInputTensor{ + Name: "0", + Datatype: "BYTES", + Shape: []int64{1}, + Contents: &contents, + } + // Create request input tensors inferInputs := []*grpc_client.ModelInferRequest_InferInputTensor{ - &grpc_client.ModelInferRequest_InferInputTensor{ - Name: "0", - Datatype: "BYTES", - Shape: []int64{1}, - }, + &inferInput, } // Create inference request for specific model/version @@ -100,11 +107,6 @@ func ModelInferRequest(client grpc_client.GRPCInferenceServiceClient, fileName s Inputs: inferInputs, } - bytes, err := ioutil.ReadFile(fileName) - - - modelInferRequest.RawInputContents = append(modelInferRequest.RawInputContents, bytes) - // Submit inference request to server modelInferResponse, err := client.ModelInfer(ctx, &modelInferRequest) if err != nil { diff --git a/client/java/kserve-api/README.md b/client/java/kserve-api/README.md index 647d6983f8..86f8184eb2 100644 --- a/client/java/kserve-api/README.md +++ b/client/java/kserve-api/README.md @@ -225,7 +225,7 @@ usage: grpc_infer_resnet [OPTION]... - Usage Example ```Bash -java -cp target/grpc-client.jar clients.grpc_infer_resnet -imgs ./common/resnet_input_images.txt -lbs ../../../common/resnet_labels.txt --grpc_port 9000 +java -cp target/grpc-client.jar clients.grpc_infer_resnet -imgs ./resnet_input_images.txt -lbs ../../common/resnet_labels.txt --grpc_port 9000 ../../../demos/common/static/images/airliner.jpeg classified as 404 airliner ../../../demos/common/static/images/arctic-fox.jpeg classified as 279 Arctic fox, white fox, Alopex lagopus ../../../demos/common/static/images/bee.jpeg classified as 309 bee diff --git a/client/java/kserve-api/src/main/java/clients/grpc_infer_resnet.java b/client/java/kserve-api/src/main/java/clients/grpc_infer_resnet.java index 8b65923e45..be2e1b2b52 100644 --- a/client/java/kserve-api/src/main/java/clients/grpc_infer_resnet.java +++ b/client/java/kserve-api/src/main/java/clients/grpc_infer_resnet.java @@ -34,6 +34,7 @@ import com.google.protobuf.ByteString; +import inference.GrpcPredictV2.InferTensorContents; import inference.GRPCInferenceServiceGrpc; import inference.GRPCInferenceServiceGrpc.GRPCInferenceServiceBlockingStub; import inference.GrpcPredictV2.ModelInferRequest; @@ -102,8 +103,6 @@ public static void main(String[] args) { input.setDatatype("BYTES"); input.addShape(1); - request.addInputs(0, input); - List labels = new ArrayList<>(); try (FileInputStream fis = new FileInputStream(cmd.getOptionValue("lbs"))) { try (Scanner sc = new Scanner(fis)) { @@ -118,13 +117,15 @@ public static void main(String[] args) { try (FileInputStream fis = new FileInputStream(cmd.getOptionValue("imgs"))) { try (Scanner sc = new Scanner(fis)) { while (sc.hasNext()) { - + request.clearInputs(); String[] line = sc.nextLine().split(" "); String fileName = line[0]; int label = Integer.parseInt(line[1]); FileInputStream imageStream = new FileInputStream(fileName); - request.clearRawInputContents(); - request.addRawInputContents(ByteString.readFrom(imageStream)); + InferTensorContents.Builder input_data = InferTensorContents.newBuilder(); + input_data.addBytesContents(ByteString.readFrom(imageStream)); + input.setContents(input_data); + request.addInputs(0, input); ModelInferResponse response = grpc_stub.modelInfer(request.build()); diff --git a/client/python/kserve-api/samples/README.md b/client/python/kserve-api/samples/README.md index 207d4d6e12..bc2ad7916e 100644 --- a/client/python/kserve-api/samples/README.md +++ b/client/python/kserve-api/samples/README.md @@ -329,9 +329,8 @@ Classification accuracy: 100.00 ```Bash python3 grpc_infer_binary_resnet.py --help -usage: grpc_infer_binary_resnet.py [-h] [--images_list IMAGES_LIST] [--labels_numpy_path LABELS_NUMPY_PATH] [--grpc_address GRPC_ADDRESS] - [--grpc_port GRPC_PORT] [--input_name INPUT_NAME] [--output_name OUTPUT_NAME] [--batchsize BATCHSIZE] - [--model_name MODEL_NAME] [--pipeline_name PIPELINE_NAME] +usage: grpc_infer_binary_resnet.py [-h] [--images_list IMAGES_LIST] [--grpc_address GRPC_ADDRESS] [--grpc_port GRPC_PORT] [--input_name INPUT_NAME] [--output_name OUTPUT_NAME] [--batchsize BATCHSIZE] + [--model_name MODEL_NAME] [--pipeline_name PIPELINE_NAME] [--tls] Sends requests via KServe gRPC API using images in format supported by OpenCV. It displays performance statistics and optionally the model accuracy @@ -339,8 +338,6 @@ optional arguments: -h, --help show this help message and exit --images_list IMAGES_LIST path to a file with a list of labeled images - --labels_numpy_path LABELS_NUMPY_PATH - numpy in shape [n,1] - can be used to check model accuracy --grpc_address GRPC_ADDRESS Specify url to grpc service. default:localhost --grpc_port GRPC_PORT @@ -355,12 +352,13 @@ optional arguments: Define model name, must be same as is in service. default: resnet --pipeline_name PIPELINE_NAME Define pipeline name, must be same as is in service + --tls use TLS communication with GRPC endpoint ``` - Usage Example ```Bash -python3 grpc_infer_binary_resnet.py --grpc_port 9000 --images_list ../../resnet_input_images.txt --labels_numpy_path ../../lbs.npy --input_name 0 --output_name 1463 --model_name resnet +python3 grpc_infer_binary_resnet.py --grpc_port 9000 --images_list ../../resnet_input_images.txt --input_name 0 --output_name 1463 --model_name resnet Start processing: Model name: resnet Iteration 0; Processing time: 27.09 ms; speed 36.92 fps @@ -794,9 +792,8 @@ Classification accuracy: 100.00 ```Bash python3 ./http_infer_binary_resnet.py --help -usage: http_infer_binary_resnet.py [-h] [--images_list IMAGES_LIST] [--labels_numpy_path LABELS_NUMPY_PATH] [--http_address HTTP_ADDRESS] - [--http_port HTTP_PORT] [--input_name INPUT_NAME] [--output_name OUTPUT_NAME] [--batchsize BATCHSIZE] - [--model_name MODEL_NAME] [--pipeline_name PIPELINE_NAME] +usage: http_infer_binary_resnet.py [-h] [--images_list IMAGES_LIST] [--http_address HTTP_ADDRESS] [--http_port HTTP_PORT] [--input_name INPUT_NAME] [--output_name OUTPUT_NAME] [--batchsize BATCHSIZE] + [--model_name MODEL_NAME] [--pipeline_name PIPELINE_NAME] [--tls] [--server_cert SERVER_CERT] [--client_cert CLIENT_CERT] [--client_key CLIENT_KEY] Sends requests via KServe REST API using binary encoded images. It displays performance statistics and optionally the model accuracy @@ -804,8 +801,6 @@ optional arguments: -h, --help show this help message and exit --images_list IMAGES_LIST path to a file with a list of labeled images - --labels_numpy_path LABELS_NUMPY_PATH - numpy in shape [n,1] - can be used to check model accuracy --http_address HTTP_ADDRESS Specify url to http service. default:localhost --http_port HTTP_PORT @@ -820,12 +815,19 @@ optional arguments: Define model name, must be same as is in service. default: resnet --pipeline_name PIPELINE_NAME Define pipeline name, must be same as is in service + --tls use TLS communication with HTTP endpoint + --server_cert SERVER_CERT + Path to server certificate + --client_cert CLIENT_CERT + Path to client certificate + --client_key CLIENT_KEY + Path to client key ``` - Usage Example ```Bash -python3 ./http_infer_binary_resnet.py --http_port 8000 --images_list ../../resnet_input_images.txt --labels_numpy_path ../../lbs.npy --input_name 0 --output_name 1463 --model_name resnet +python3 ./http_infer_binary_resnet.py --http_port 8000 --images_list ../../resnet_input_images.txt --input_name 0 --output_name 1463 --model_name resnet Start processing: Model name: resnet Iteration 0; Processing time: 38.61 ms; speed 25.90 fps diff --git a/client/python/kserve-api/samples/grpc_infer_binary_resnet.py b/client/python/kserve-api/samples/grpc_infer_binary_resnet.py index 85a3948119..5346ac94d4 100644 --- a/client/python/kserve-api/samples/grpc_infer_binary_resnet.py +++ b/client/python/kserve-api/samples/grpc_infer_binary_resnet.py @@ -17,59 +17,19 @@ import sys sys.path.append("../../../../demos/common/python") -import ast -import grpc import numpy as np import classes import datetime import argparse from client_utils import print_statistics -from tritonclient.grpc import service_pb2, service_pb2_grpc -from tritonclient.utils import * - -DataTypeToContentsFieldName = { - 'BOOL' : 'bool_contents', - 'BYTES' : 'bytes_contents', - 'FP32' : 'fp32_contents', - 'FP64' : 'fp64_contents', - 'INT64' : 'int64_contents', - 'INT32' : 'int_contents', - 'UINT64' : 'uint64_contents', - 'UINT32' : 'uint_contents', - 'INT64' : 'int64_contents', - 'INT32' : 'int_contents', -} - -def as_numpy(response, name): - index = 0 - for output in response.outputs: - if output.name == name: - shape = [] - for value in output.shape: - shape.append(value) - datatype = output.datatype - field_name = DataTypeToContentsFieldName[datatype] - contents = getattr(output, "contents") - contents = getattr(contents, f"{field_name}") - if index < len(response.raw_output_contents): - np_array = np.frombuffer( - response.raw_output_contents[index], dtype=triton_to_np_dtype(output.datatype)) - elif len(contents) != 0: - np_array = np.array(contents, - copy=False) - else: - np_array = np.empty(0) - np_array = np_array.reshape(shape) - return np_array - else: - index += 1 - return None + +import tritonclient.grpc as grpclient + if __name__ == '__main__': parser = argparse.ArgumentParser(description='Sends requests via KServe gRPC API using images in format supported by OpenCV. ' 'It displays performance statistics and optionally the model accuracy') parser.add_argument('--images_list', required=False, default='input_images.txt', help='path to a file with a list of labeled images') - parser.add_argument('--labels_numpy_path', required=False, help='numpy in shape [n,1] - can be used to check model accuracy') parser.add_argument('--grpc_address',required=False, default='localhost', help='Specify url to grpc service. default:localhost') parser.add_argument('--grpc_port',required=False, default=9000, help='Specify port to grpc service. default: 9000') parser.add_argument('--input_name',required=False, default='input', help='Specify input tensor name. default: input') @@ -82,10 +42,14 @@ def as_numpy(response, name): dest='model_name') parser.add_argument('--pipeline_name', default='', help='Define pipeline name, must be same as is in service', dest='pipeline_name') + parser.add_argument('--tls', default=False, action='store_true', help='use TLS communication with GRPC endpoint') + error = False args = vars(parser.parse_args()) address = "{}:{}".format(args['grpc_address'],args['grpc_port']) + input_name = args['input_name'] + output_name = args['output_name'] processing_times = np.zeros((0),int) @@ -96,10 +60,6 @@ def as_numpy(response, name): while batch_size > len(lines): lines += lines - if args.get('labels_numpy_path') is not None: - lbs = np.load(args['labels_numpy_path'], mmap_mode='r', allow_pickle=False) - matched_count = 0 - total_executed = 0 batch_size = int(args.get('batchsize')) print('Start processing:') @@ -108,76 +68,79 @@ def as_numpy(response, name): iteration = 0 is_pipeline_request = bool(args.get('pipeline_name')) - # Create gRPC stub for communicating with the server - channel = grpc.insecure_channel(address) - grpc_stub = service_pb2_grpc.GRPCInferenceServiceStub(channel) + model_name = args.get('pipeline_name') if is_pipeline_request else args.get('model_name') + + try: + triton_client = grpclient.InferenceServerClient( + url=address, + ssl=args['tls'], + verbose=False) + except Exception as e: + print("context creation failed: " + str(e)) + sys.exit() + + processing_times = np.zeros((0),int) + total_executed = 0 + matched_count = 0 batch_i = 0 image_data = [] + image_binary_size = [] labels = [] for line in lines: + inputs = [] batch_i += 1 path, label = line.strip().split(" ") with open(path, 'rb') as f: image_data.append(f.read()) - labels.append(label) + labels.append(int(label)) if batch_i < batch_size: continue - inputs = [] - inputs.append(service_pb2.ModelInferRequest().InferInputTensor()) - inputs[0].name = args['input_name'] - inputs[0].datatype = "BYTES" - inputs[0].shape.extend([1]) - inputs[0].contents.bytes_contents.append(image_data[0]) - + inputs.append(grpclient.InferInput(args['input_name'], [batch_i], "BYTES")) outputs = [] - outputs.append(service_pb2.ModelInferRequest().InferRequestedOutputTensor()) - outputs[0].name = "prob" - - request = service_pb2.ModelInferRequest() - request.model_name = args.get('pipeline_name') if is_pipeline_request else args.get('model_name') - request.inputs.extend(inputs) - + outputs.append(grpclient.InferRequestedOutput(output_name)) + + nmpy = np.array(image_data , dtype=np.object_) + inputs[0].set_data_from_numpy(nmpy) start_time = datetime.datetime.now() - request.outputs.extend(outputs) - response = grpc_stub.ModelInfer(request) + results = triton_client.infer(model_name=model_name, + inputs=inputs, + outputs=outputs) end_time = datetime.datetime.now() - duration = (end_time - start_time).total_seconds() * 1000 processing_times = np.append(processing_times,np.array([int(duration)])) - output = as_numpy(response, args['output_name']) + output = results.as_numpy(output_name) nu = np.array(output) # for object classification models show imagenet class print('Iteration {}; Processing time: {:.2f} ms; speed {:.2f} fps'.format(iteration,round(np.average(duration), 2), - round(1000 * batch_size / np.average(duration), 2) - )) + round(1000 * batch_size / np.average(duration), 2) + )) # Comment out this section for non imagenet datasets print("imagenet top results in a single batch:") for i in range(nu.shape[0]): - lbs_i = iteration * batch_size - single_result = nu[[i],...] - offset = 0 - if nu.shape[1] == 1001: - offset = 1 - ma = np.argmax(single_result) - offset + if is_pipeline_request: + # shape (1,) + print("response shape", output.shape) + ma = nu[0] - 1 # indexes needs to be shifted left due to 1x1001 shape + else: + # shape (1,1000) + single_result = nu[[i],...] + offset = 0 + if nu.shape[1] == 1001: + offset = 1 + ma = np.argmax(single_result) - offset mark_message = "" - if args.get('labels_numpy_path') is not None: - total_executed += 1 - if ma == lbs[lbs_i + i]: - matched_count += 1 - mark_message = "; Correct match." - else: - mark_message = "; Incorrect match. Should be {} {}".format(lbs[lbs_i + i], classes.imagenet_classes[lbs[lbs_i + i]] ) - print("\t",i, classes.imagenet_classes[ma],ma, mark_message) - + total_executed += 1 + if ma == labels[i]: + matched_count += 1 + mark_message = "; Correct match." + else: + mark_message = "; Incorrect match. Should be".format(labels[i], classes.imagenet_classes[labels[i]]) + print("\t", i, classes.imagenet_classes[ma], ma, mark_message) # Comment out this section for non imagenet datasets - iteration += 1 - image_data = [] labels = [] + image_data = [] batch_i = 0 print_statistics(processing_times, batch_size) - - if args.get('labels_numpy_path') is not None: - print('Classification accuracy: {:.2f}'.format(100*matched_count/total_executed)) - + print('Classification accuracy: {:.2f}'.format(100*matched_count/total_executed)) diff --git a/client/python/kserve-api/samples/http_infer_binary_resnet.py b/client/python/kserve-api/samples/http_infer_binary_resnet.py index 4e1a213b42..b3fb25f700 100644 --- a/client/python/kserve-api/samples/http_infer_binary_resnet.py +++ b/client/python/kserve-api/samples/http_infer_binary_resnet.py @@ -22,15 +22,14 @@ import datetime import argparse from client_utils import print_statistics -import requests -import json + +import tritonclient.http as httpclient if __name__ == '__main__': parser = argparse.ArgumentParser(description='Sends requests via KServe REST API using binary encoded images. ' 'It displays performance statistics and optionally the model accuracy') parser.add_argument('--images_list', required=False, default='input_images.txt', help='path to a file with a list of labeled images') - parser.add_argument('--labels_numpy_path', required=False, help='numpy in shape [n,1] - can be used to check model accuracy') parser.add_argument('--http_address',required=False, default='localhost', help='Specify url to http service. default:localhost') parser.add_argument('--http_port',required=False, default=8000, help='Specify port to http service. default: 8000') parser.add_argument('--input_name',required=False, default='input', help='Specify input tensor name. default: input') @@ -43,6 +42,10 @@ dest='model_name') parser.add_argument('--pipeline_name', default='', help='Define pipeline name, must be same as is in service', dest='pipeline_name') + parser.add_argument('--tls', default=False, action='store_true', help='use TLS communication with HTTP endpoint') + parser.add_argument('--server_cert', required=False, help='Path to server certificate', default=None) + parser.add_argument('--client_cert', required=False, help='Path to client certificate', default=None) + parser.add_argument('--client_key', required=False, help='Path to client key', default=None) error = False args = vars(parser.parse_args()) @@ -51,6 +54,15 @@ input_name = args['input_name'] output_name = args['output_name'] + if args['tls']: + ssl_options = { + 'keyfile':args['client_key'], + 'cert_file':args['client_cert'], + 'ca_certs':args['server_cert'] + } + else: + ssl_options = None + processing_times = np.zeros((0),int) input_images = args.get('images_list') @@ -60,10 +72,6 @@ while batch_size > len(lines): lines += lines - if args.get('labels_numpy_path') is not None: - lbs = np.load(args['labels_numpy_path'], mmap_mode='r', allow_pickle=False) - matched_count = 0 - total_executed = 0 batch_size = int(args.get('batchsize')) print('Start processing:') @@ -74,72 +82,79 @@ model_name = args.get('pipeline_name') if is_pipeline_request else args.get('model_name') - url = f"http://{address}/v2/models/{model_name}/infer" - http_session = requests.session() + try: + triton_client = httpclient.InferenceServerClient( + url=address, + ssl=args['tls'], + ssl_options=ssl_options, + verbose=False) + except Exception as e: + print("context creation failed: " + str(e)) + sys.exit() + processing_times = np.zeros((0),int) + + total_executed = 0 + matched_count = 0 batch_i = 0 image_data = [] image_binary_size = [] labels = [] for line in lines: + inputs = [] batch_i += 1 path, label = line.strip().split(" ") with open(path, 'rb') as f: image_data.append(f.read()) - image_binary_size.append(len(image_data[-1])) - labels.append(label) + labels.append(int(label)) if batch_i < batch_size: continue - image_binary_size_str = ",".join(map(str, image_binary_size)) - inference_header = {"inputs":[{"name":input_name,"shape":[batch_i],"datatype":"BYTES","parameters":{"binary_data_size":image_binary_size_str}}]} - inference_header_binary = json.dumps(inference_header).encode() - + inputs.append(httpclient.InferInput(args['input_name'], [batch_i], "BYTES")) + outputs = [] + outputs.append(httpclient.InferRequestedOutput(output_name, binary_data=True)) + + nmpy = np.array(image_data , dtype=np.object_) + inputs[0].set_data_from_numpy(nmpy) start_time = datetime.datetime.now() - results = http_session.post(url, inference_header_binary + b''.join(image_data), headers={"Inference-Header-Content-Length":str(len(inference_header_binary))}) + results = triton_client.infer(model_name=model_name, + inputs=inputs, + outputs=outputs) end_time = datetime.datetime.now() duration = (end_time - start_time).total_seconds() * 1000 processing_times = np.append(processing_times,np.array([int(duration)])) - results_dict = json.loads(results.text) - if "error" in results_dict.keys(): - print(f"Error: {results_dict['error']}") - error = True - break - - output = np.array(json.loads(results.text)['outputs'][0]['data']) - output_shape = tuple(results_dict['outputs'][0]['shape']) - nu = np.reshape(output, output_shape) + output = results.as_numpy(output_name) + nu = np.array(output) # for object classification models show imagenet class print('Iteration {}; Processing time: {:.2f} ms; speed {:.2f} fps'.format(iteration,round(np.average(duration), 2), - round(1000 * batch_size / np.average(duration), 2) - )) + round(1000 * batch_size / np.average(duration), 2) + )) # Comment out this section for non imagenet datasets print("imagenet top results in a single batch:") for i in range(nu.shape[0]): - lbs_i = iteration * batch_size - single_result = nu[[i],...] - offset = 0 - if nu.shape[1] == 1001: - offset = 1 - ma = np.argmax(single_result) - offset + if is_pipeline_request: + # shape (1,) + print("response shape", output.shape) + ma = nu[0] - 1 # indexes needs to be shifted left due to 1x1001 shape + else: + # shape (1,1000) + single_result = nu[[i],...] + offset = 0 + if nu.shape[1] == 1001: + offset = 1 + ma = np.argmax(single_result) - offset mark_message = "" - if args.get('labels_numpy_path') is not None: - total_executed += 1 - if ma == lbs[lbs_i + i]: - matched_count += 1 - mark_message = "; Correct match." - else: - mark_message = "; Incorrect match. Should be {} {}".format(lbs[lbs_i + i], classes.imagenet_classes[lbs[lbs_i + i]] ) - print("\t",i, classes.imagenet_classes[ma],ma, mark_message) + total_executed += 1 + if ma == labels[i]: + matched_count += 1 + mark_message = "; Correct match." + else: + mark_message = "; Incorrect match. Should be".format(labels[i], classes.imagenet_classes[labels[i]]) + print("\t", i, classes.imagenet_classes[ma], ma, mark_message) # Comment out this section for non imagenet datasets - iteration += 1 - image_data = [] - image_binary_size = [] labels = [] + image_data = [] batch_i = 0 - if not error: - print_statistics(processing_times, batch_size) - - if args.get('labels_numpy_path') is not None: - print('Classification accuracy: {:.2f}'.format(100*matched_count/total_executed)) - + print_statistics(processing_times, batch_size) + print('Classification accuracy: {:.2f}'.format(100*matched_count/total_executed)) + \ No newline at end of file diff --git a/client/python/kserve-api/samples/requirements.txt b/client/python/kserve-api/samples/requirements.txt index 42e7cf9c8f..222b7adf1b 100644 --- a/client/python/kserve-api/samples/requirements.txt +++ b/client/python/kserve-api/samples/requirements.txt @@ -1,4 +1,4 @@ protobuf==3.19.5 tritonclient[all]==2.22.3 -requests==2.27.1 +requests==2.31.0 grpcio diff --git a/client/python/ovmsclient/lib/Makefile b/client/python/ovmsclient/lib/Makefile index 6c87be0844..6da873c155 100644 --- a/client/python/ovmsclient/lib/Makefile +++ b/client/python/ovmsclient/lib/Makefile @@ -18,7 +18,7 @@ VIRTUALENV_EXE := python3 -m virtualenv -p python3 VIRTUALENV_DIR := .venv-ovmsclient ACTIVATE="$(VIRTUALENV_DIR)/bin/activate" -PACKAGE_PATH := dist/ovmsclient-2022.3-py3-none-any.whl +PACKAGE_PATH := dist/ovmsclient-2023.0-py3-none-any.whl TEST_TYPE := BASIC .PHONY: build-deps build-package build test clean style diff --git a/client/python/ovmsclient/lib/README.md b/client/python/ovmsclient/lib/README.md index d1f87dbc68..7e09b663dd 100644 --- a/client/python/ovmsclient/lib/README.md +++ b/client/python/ovmsclient/lib/README.md @@ -44,7 +44,7 @@ Assuming you have TFS API built, you can use `make build-package` target to buil **To install the package run:** ```bash -pip3 install --force-reinstall --no-deps dist/ovmsclient-2022.3-py3-none-any.whl +pip3 install --force-reinstall --no-deps dist/ovmsclient-2023.0-py3-none-any.whl ``` *Note*: For development purposes you may want to repeatedly reinstall the package. @@ -61,7 +61,7 @@ make build-package ``` - `make test` - runs tests on `ovmsclient` package. By default the package located in `dist/` directory is used. To specify custom package path pass `PACKAGE_PATH` option like: - `make test PACKAGE_PATH=/opt/packages/ovmsclient-2022.3-py3-none-any.whl` + `make test PACKAGE_PATH=/opt/packages/ovmsclient-2023.0-py3-none-any.whl` ```bash make test ``` diff --git a/client/python/ovmsclient/lib/docs/grpc_client.md b/client/python/ovmsclient/lib/docs/grpc_client.md index ba8b5f619d..1ce25a3cc1 100644 --- a/client/python/ovmsclient/lib/docs/grpc_client.md +++ b/client/python/ovmsclient/lib/docs/grpc_client.md @@ -182,7 +182,7 @@ Request prediction on provided inputs. ... } ``` - Where `output_name` is a `string` and `prediction_result` is a `numpy ndarray` + Where `output_name` is a `string` and `prediction_result` is a `numpy ndarray`. Both strings and binary data are returned as array of `numpy.bytes_` dtype. **Raises:** @@ -202,12 +202,21 @@ Request prediction on provided inputs. ```python import ovmsclient client = ovmsclient.make_grpc_client("localhost:9000") + +# Numeric input inputs = {"input": [1, 2, 3]} # request prediction on specific model version, with timeout set to 2.5 seconds results = client.predict(inputs=inputs, model_name="model", model_version=1, timeout=2.5) # request prediction on the latest model version results = client.predict(inputs=inputs, model_name="model") +# String input +inputs = {"input": ["We have a really nice", "One, two, three,"]} +results = client.predict(inputs=inputs, model_name="model") +print(inputs["input"]) +# ['We have a really nice', 'One, two, three,'] +print(results) +# [b'We have a really nice way' b'One, two, three, four'] ``` --- diff --git a/client/python/ovmsclient/lib/docs/http_client.md b/client/python/ovmsclient/lib/docs/http_client.md index 731afd3742..15d7f64259 100644 --- a/client/python/ovmsclient/lib/docs/http_client.md +++ b/client/python/ovmsclient/lib/docs/http_client.md @@ -180,7 +180,7 @@ Request prediction on provided inputs. ... } ``` - Where `output_name` is a `string` and `prediction_result` is a `numpy ndarray` + Where `output_name` is a `string` and `prediction_result` is a `numpy ndarray`. Both strings and binary data are returned as array of `numpy.bytes_` dtype. **Raises:** @@ -200,12 +200,21 @@ Request prediction on provided inputs. ```python import ovmsclient client = ovmsclient.make_http_client("localhost:9000") + +# Numeric input inputs = {"input": [1, 2, 3]} # request prediction on specific model version, with timeout set to 2.5 seconds results = client.predict(inputs=inputs, model_name="model", model_version=1, timeout=2.5) # request prediction on the latest model version results = client.predict(inputs=inputs, model_name="model") +# String input +inputs = {"input": ["We have a really nice", "One, two, three,"]} +results = client.predict(inputs=inputs, model_name="model") +print(inputs["input"]) +# ['We have a really nice', 'One, two, three,'] +print(results) +# [b'We have a really nice way' b'One, two, three, four'] ``` --- diff --git a/client/python/ovmsclient/lib/docs/pypi_overview.md b/client/python/ovmsclient/lib/docs/pypi_overview.md index 0578151125..232440a0ba 100644 --- a/client/python/ovmsclient/lib/docs/pypi_overview.md +++ b/client/python/ovmsclient/lib/docs/pypi_overview.md @@ -9,7 +9,7 @@ The `ovmsclient` package works both with OpenVINO™ Model Server and Tensor The `ovmsclient` can replace `tensorflow-serving-api` package with reduced footprint and simplified interface. -See [API reference](https://github.com/openvinotoolkit/model_server/blob/releases/2022/3/client/python/ovmsclient/lib/docs/README.md) for usage details. +See [API reference](https://github.com/openvinotoolkit/model_server/blob/releases/2023/0/client/python/ovmsclient/lib/docs/README.md) for usage details. ## Usage example @@ -38,4 +38,4 @@ results = client.predict(inputs=inputs, model_name="model") ``` -Learn more on `ovmsclient` [documentation site](https://github.com/openvinotoolkit/model_server/tree/releases/2022/3/client/python/ovmsclient/lib). \ No newline at end of file +Learn more on `ovmsclient` [documentation site](https://github.com/openvinotoolkit/model_server/tree/releases/2023/0/client/python/ovmsclient/lib). \ No newline at end of file diff --git a/client/python/ovmsclient/lib/ovmsclient/tfs_compat/grpc/tensors.py b/client/python/ovmsclient/lib/ovmsclient/tfs_compat/grpc/tensors.py index e1549a7ad4..b955be3161 100644 --- a/client/python/ovmsclient/lib/ovmsclient/tfs_compat/grpc/tensors.py +++ b/client/python/ovmsclient/lib/ovmsclient/tfs_compat/grpc/tensors.py @@ -24,32 +24,51 @@ class TensorType(NamedTuple): + NumpyPrimaryType: np.dtype.type TensorDtype: str TensorProtoField: str NP_TO_TENSOR_MAP = { - np.float16: TensorType(TensorDtype=DataType.DT_HALF, TensorProtoField="half_val"), - np.float32: TensorType(TensorDtype=DataType.DT_FLOAT, TensorProtoField="float_val"), - np.float64: TensorType(TensorDtype=DataType.DT_DOUBLE, TensorProtoField="double_val"), - np.int8: TensorType(TensorDtype=DataType.DT_INT8, TensorProtoField="int_val"), - np.int16: TensorType(TensorDtype=DataType.DT_INT16, TensorProtoField="int_val"), - np.int32: TensorType(TensorDtype=DataType.DT_INT32, TensorProtoField="int_val"), - np.int64: TensorType(TensorDtype=DataType.DT_INT64, TensorProtoField="int64_val"), - np.uint8: TensorType(TensorDtype=DataType.DT_UINT8, TensorProtoField="int_val"), - np.uint16: TensorType(TensorDtype=DataType.DT_UINT16, TensorProtoField="int_val"), - np.uint32: TensorType(TensorDtype=DataType.DT_UINT32, TensorProtoField="uint32_val"), - np.uint64: TensorType(TensorDtype=DataType.DT_UINT64, TensorProtoField="uint64_val"), - np.complex64: TensorType(TensorDtype=DataType.DT_COMPLEX64, TensorProtoField="scomplex_val"), - np.complex128: TensorType(TensorDtype=DataType.DT_COMPLEX128, TensorProtoField="dcomplex_val"), + np.float16: TensorType(NumpyPrimaryType=np.float16, TensorDtype=DataType.DT_HALF, + TensorProtoField="half_val"), + np.float32: TensorType(NumpyPrimaryType=np.float32, TensorDtype=DataType.DT_FLOAT, + TensorProtoField="float_val"), + np.float64: TensorType(NumpyPrimaryType=np.float64, TensorDtype=DataType.DT_DOUBLE, + TensorProtoField="double_val"), + np.int8: TensorType(NumpyPrimaryType=np.int8, TensorDtype=DataType.DT_INT8, + TensorProtoField="int_val"), + np.int16: TensorType(NumpyPrimaryType=np.int16, TensorDtype=DataType.DT_INT16, + TensorProtoField="int_val"), + np.int32: TensorType(NumpyPrimaryType=np.int32, TensorDtype=DataType.DT_INT32, + TensorProtoField="int_val"), + np.int64: TensorType(NumpyPrimaryType=np.int64, TensorDtype=DataType.DT_INT64, + TensorProtoField="int64_val"), + np.uint8: TensorType(NumpyPrimaryType=np.uint8, TensorDtype=DataType.DT_UINT8, + TensorProtoField="int_val"), + np.uint16: TensorType(NumpyPrimaryType=np.uint16, TensorDtype=DataType.DT_UINT16, + TensorProtoField="int_val"), + np.uint32: TensorType(NumpyPrimaryType=np.uint32, TensorDtype=DataType.DT_UINT32, + TensorProtoField="uint32_val"), + np.uint64: TensorType(NumpyPrimaryType=np.uint64, TensorDtype=DataType.DT_UINT64, + TensorProtoField="uint64_val"), + np.complex64: TensorType(NumpyPrimaryType=np.complex64, TensorDtype=DataType.DT_COMPLEX64, + TensorProtoField="scomplex_val"), + np.complex128: TensorType(NumpyPrimaryType=np.complex128, TensorDtype=DataType.DT_COMPLEX128, + TensorProtoField="dcomplex_val"), # Standard Python bool and np.bool_ replace deprecated np.bool type - np.bool_: TensorType(TensorDtype=DataType.DT_BOOL, TensorProtoField="bool_val"), - bool: TensorType(TensorDtype=DataType.DT_BOOL, TensorProtoField="bool_val"), - np.bytes_: TensorType(TensorDtype=DataType.DT_STRING, TensorProtoField="string_val") + bool: TensorType(NumpyPrimaryType=np.bool_, TensorDtype=DataType.DT_BOOL, + TensorProtoField="bool_val"), + np.bool_: TensorType(NumpyPrimaryType=np.bool_, TensorDtype=DataType.DT_BOOL, + TensorProtoField="bool_val"), + np.str_: TensorType(NumpyPrimaryType=np.bytes_, TensorDtype=DataType.DT_STRING, + TensorProtoField="string_val"), + np.bytes_: TensorType(NumpyPrimaryType=np.bytes_, TensorDtype=DataType.DT_STRING, + TensorProtoField="string_val"), } -TENSOR_TO_NP_MAP = {v.TensorDtype: k for k, v in NP_TO_TENSOR_MAP.items()} +TENSOR_TO_NP_MAP = {v.TensorDtype: v.NumpyPrimaryType for v in NP_TO_TENSOR_MAP.values()} TENSOR_DTYPE_TO_PROTOFIELD = {v.TensorDtype: v.TensorProtoField for v in NP_TO_TENSOR_MAP.values()} @@ -79,8 +98,12 @@ def _cast_bytes_to_dtype(values, dtype): raise ValueError(f'could not cast bytes to {dtype}. {e_info}') -def _is_bytes_shape_valid(inferred_shape, tensor_values): - return (len(inferred_shape) > 1 or (len(tensor_values.shape) > 1 and inferred_shape == [])) +# Functioned used with numpy.vectorize() +def _encode_string_to_bytes(tensor_value, encoding="UTF-8"): + return tensor_value.encode(encoding) + + +_encode_strings_to_bytes = np.vectorize(_encode_string_to_bytes) def _check_if_array_homogeneous(tensor_values): @@ -185,13 +208,18 @@ def make_tensor_proto(values, dtype=None, shape=None): else: tensor_values = _cast_ndarray_to_dtype(tensor_values, np_dtype) - if dtype == DataType.DT_STRING and _is_bytes_shape_valid(inferred_shape, tensor_values): - raise ValueError("bytes values with dtype DT_STRING must be in shape [N]") - elif inferred_shape == []: + if inferred_shape == []: inferred_shape = list(tensor_values.shape) elif inferred_shape != list(tensor_values.shape): tensor_values = tensor_values.reshape(inferred_shape) + # For strings or binary image inputs flatten array to 1-D + if dtype == DataType.DT_STRING: + tensor_values = np.ravel(tensor_values) + # Encode strings + if tensor_values.dtype.type == np.str_: + tensor_values = _encode_strings_to_bytes(tensor_values) + dims = [] for d in inferred_shape: dims.append(TensorShapeProto.Dim(size=d)) @@ -274,7 +302,7 @@ def make_ndarray(tensor_proto): elif np_dtype == np.complex128: it = iter(tensor_proto.dcomplex_val) values = np.array([complex(x[0], x[1]) for x in zip(it, it)], dtype=np_dtype) - elif np_dtype == np.bool_ or np_dtype == bool: + elif np_dtype == np.bool_: values = np.fromiter(tensor_proto.bool_val, dtype=np_dtype) else: raise TypeError("Unsupported tensor type: %s" % tensor_proto.dtype) diff --git a/client/python/ovmsclient/lib/ovmsclient/tfs_compat/http/requests.py b/client/python/ovmsclient/lib/ovmsclient/tfs_compat/http/requests.py index 35af70d609..1d016eb4a0 100644 --- a/client/python/ovmsclient/lib/ovmsclient/tfs_compat/http/requests.py +++ b/client/python/ovmsclient/lib/ovmsclient/tfs_compat/http/requests.py @@ -20,7 +20,6 @@ from ovmsclient.tfs_compat.base.requests import (PredictRequest, ModelMetadataRequest, ModelStatusRequest, _check_model_spec) from ovmsclient.tfs_compat.grpc.tensors import (NP_TO_TENSOR_MAP, DataType, - _is_bytes_shape_valid, _check_if_array_homogeneous) @@ -167,6 +166,14 @@ def make_status_request(model_name, model_version=0): return HttpModelStatusRequest(model_name, model_version) +# Functioned used with numpy.vectorize() +def _decode_bytes_to_string(tensor_value, encoding="UTF-8"): + return tensor_value.decode(encoding) + + +_decode_bytes_to_strings = np.vectorize(_decode_bytes_to_string) + + def _parse_input_data(values): # create numpy ndarray from values and find its dtype if not provided @@ -187,14 +194,14 @@ def _parse_input_data(values): else: raise TypeError("provided values type is not valid") - if dtype == DataType.DT_STRING and _is_bytes_shape_valid(tensor_values.shape, tensor_values): - raise ValueError("bytes values with dtype DT_STRING must be in shape [N]") - - if dtype == DataType.DT_STRING: - b64_values = [] - for value in tensor_values: - b64_value = base64.b64encode(value).decode('utf-8') - b64_values.append({"b64": b64_value}) - return b64_values - else: - return tensor_values.tolist() + if dtype == DataType.DT_STRING and tensor_values.dtype.type == np.bytes_: + if len(tensor_values.shape) != 1: + # Bytes in multidimensional shape will be encoded in UTF-8 as regular input + tensor_values = _decode_bytes_to_strings(tensor_values) + else: + b64_values = [] + for value in tensor_values: + b64_value = base64.b64encode(value).decode('utf-8') + b64_values.append({"b64": b64_value}) + return b64_values + return tensor_values.tolist() diff --git a/client/python/ovmsclient/lib/scripts/build_tfs_api.sh b/client/python/ovmsclient/lib/scripts/build_tfs_api.sh index 6eceee3102..451ae08372 100755 --- a/client/python/ovmsclient/lib/scripts/build_tfs_api.sh +++ b/client/python/ovmsclient/lib/scripts/build_tfs_api.sh @@ -36,6 +36,7 @@ mkdir compiled_protos cp -R tf/tensorflow ovmsclient/tfs_compat/protos/tensorflow cp -R tfs/tensorflow_serving ovmsclient/tfs_compat/protos/tensorflow_serving find ovmsclient/tfs_compat/protos -name "*.proto" -exec sed -i -E 's/import "tensorflow/import "ovmsclient\/tfs_compat\/protos\/tensorflow/g' {} \; +python3 scripts/rename_proto_package.py protoc --proto_path=$PWD --python_out=$PWD/compiled_protos \ $PWD/ovmsclient/tfs_compat/protos/tensorflow/core/framework/*.proto \ diff --git a/client/python/ovmsclient/lib/scripts/rename_proto_package.py b/client/python/ovmsclient/lib/scripts/rename_proto_package.py new file mode 100644 index 0000000000..363f62b270 --- /dev/null +++ b/client/python/ovmsclient/lib/scripts/rename_proto_package.py @@ -0,0 +1,44 @@ +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +base_path = "ovmsclient/tfs_compat/protos" + +# Find all proto files +proto_paths = [] +for root, subdirs, files in os.walk(base_path): + for file in files: + if file.endswith(".proto"): + file_path = os.path.join(root, file) + proto_paths.append(file_path) + +# Replace package name if defined in all proto files +replacement_map = { + "package tensorflow": "package ovmsclient", + " tensorflow.": " ovmsclient.", + " .tensorflow.": " .ovmsclient." +} + +for proto_path in proto_paths: + with open(proto_path, 'rt') as file : + filedata = file.read() + + for to_replace, replace_with in replacement_map.items(): + filedata = filedata.replace(to_replace, replace_with) + + with open(proto_path, 'wt') as file: + file.write(filedata) diff --git a/client/python/ovmsclient/lib/setup.py b/client/python/ovmsclient/lib/setup.py index b406a9c5e2..920c7f132e 100644 --- a/client/python/ovmsclient/lib/setup.py +++ b/client/python/ovmsclient/lib/setup.py @@ -49,7 +49,7 @@ def run(self): setuptools.setup( name="ovmsclient", - version="2022.3", + version="2023.0", license="Apache License 2.0", author="Intel Corporation", author_email="ovms.engineering@intel.com", diff --git a/client/python/ovmsclient/lib/tests/config.py b/client/python/ovmsclient/lib/tests/config.py index 6a0603cb73..a204ac5dd0 100644 --- a/client/python/ovmsclient/lib/tests/config.py +++ b/client/python/ovmsclient/lib/tests/config.py @@ -15,6 +15,7 @@ # from enum import IntEnum +import numpy as np class CallCount(IntEnum): @@ -337,7 +338,7 @@ class CallCount(IntEnum): TypeError, "inputs keys type should be str, but found int" ), ( - [{"input": [1, 2, "three"]}, "model_name", 1, 1], + [{"input": np.array(b'1', dtype=np.dtype(np.void, 10))}, "model_name", 1, 1], TypeError, "provided values type is not valid" ), ( diff --git a/client/python/ovmsclient/lib/tests/tfs_compat_grpc/config.py b/client/python/ovmsclient/lib/tests/tfs_compat_grpc/config.py index a12d8452ef..5a809e6a4b 100644 --- a/client/python/ovmsclient/lib/tests/tfs_compat_grpc/config.py +++ b/client/python/ovmsclient/lib/tests/tfs_compat_grpc/config.py @@ -176,14 +176,6 @@ "input1": (1, 2, 3) }, 'model_name', 0, TypeError, "values type should be (list, np.ndarray, scalar), but is tuple"), - ({ - "input1": [ - [bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00]), - bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00])], - [bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00]), - bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00])] - ] - }, 'model_name', 0, ValueError, "bytes values with dtype DT_STRING must be in shape [N]"), ] # (inputs_dict, @@ -222,7 +214,9 @@ TensorShapeProto.Dim(size=3)]), tensor_content=array([1, 2, 3, 4, 5, 6]).tobytes()), "input2": 5.0, - "input3": bytes([1, 2, 3]) + "input3": bytes([1, 2, 3]), + "input4": [[bytes([1, 2, 3]), bytes([1, 2, 3])], [bytes([1, 2, 3]), bytes([1, 2, 3])]], + "input5": [["list", "of", "strings"]], }, { "input2": { "field": "float_val", @@ -235,7 +229,21 @@ "shape": TensorShapeProto(dim=[TensorShapeProto.Dim(size=1)]), "dtype": DataType.DT_STRING, 'value': [bytes([1, 2, 3])] - } + }, + "input4": { + "field": "string_val", + "shape": TensorShapeProto(dim=[TensorShapeProto.Dim(size=2), + TensorShapeProto.Dim(size=2)]), + "dtype": DataType.DT_STRING, + 'value': [bytes([1, 2, 3]), bytes([1, 2, 3]), bytes([1, 2, 3]), bytes([1, 2, 3])] + }, + "input5": { + "field": "string_val", + "shape": TensorShapeProto(dim=[TensorShapeProto.Dim(size=1), + TensorShapeProto.Dim(size=3)]), + "dtype": DataType.DT_STRING, + 'value': [b'list', b'of', b'strings'] + }, }, 'model_name', 0), ({ @@ -274,9 +282,20 @@ "2": TensorProto(dtype=DataType.DT_STRING, tensor_shape=TensorShapeProto(dim=[TensorShapeProto.Dim(size=1)]), string_val=[bytes([1, 2, 3])]), + "3": TensorProto(dtype=DataType.DT_STRING, + tensor_shape=TensorShapeProto(dim=[TensorShapeProto.Dim(size=1), + TensorShapeProto.Dim(size=3)]), + string_val=[b'list', b'of', b'strings']), + "4": TensorProto(dtype=DataType.DT_STRING, + tensor_shape=TensorShapeProto(dim=[TensorShapeProto.Dim(size=2), + TensorShapeProto.Dim(size=2)]), + string_val=[bytes([1, 2, 3]), bytes([1, 2, 3]), + bytes([1, 2, 3]), bytes([1, 2, 3])]), }, "model_name", 0, { "1463": [bytes([1, 2, 3]), bytes([4, 5])], - "2": [bytes([1, 2, 3])] + "2": [bytes([1, 2, 3])], + "3": [[b'list', b'of', b'strings']], + "4": [[bytes([1, 2, 3]), bytes([1, 2, 3])], [bytes([1, 2, 3]), bytes([1, 2, 3])]] }), ] diff --git a/client/python/ovmsclient/lib/tests/tfs_compat_grpc/test_tensors.py b/client/python/ovmsclient/lib/tests/tfs_compat_grpc/test_tensors.py index fe881fde03..30aca1b11d 100644 --- a/client/python/ovmsclient/lib/tests/tfs_compat_grpc/test_tensors.py +++ b/client/python/ovmsclient/lib/tests/tfs_compat_grpc/test_tensors.py @@ -348,12 +348,22 @@ def test_make_tensor_proto_valid_scalar(params, expected_shape, expected_dtype, TensorShapeProto(dim=[TensorShapeProto.Dim(size=1)]), DataType.DT_STRING, "string_val" ), + ({"values": [[bytes([0x13, 0x00, 0x00, 0x00, 0x08]), bytes([0x13, 0x00, 0x00, 0x00, 0x08])]], + "dtype": DataType.DT_STRING}, + TensorShapeProto(dim=[TensorShapeProto.Dim(size=1), TensorShapeProto.Dim(size=2)]), + DataType.DT_STRING, + "string_val" + ), ]) -def test_make_tensor_proto_valid_string(params, expected_shape, expected_dtype, expected_field): +def test_make_tensor_proto_valid_binary(params, expected_shape, expected_dtype, expected_field): tensor_proto = make_tensor_proto(**params) if expected_field == "string_val": - assert tensor_proto.__getattribute__(expected_field) == [params["values"]] + if type(params["values"]) is not list: + assert tensor_proto.__getattribute__(expected_field) == [params["values"]] + else: + assert (tensor_proto.__getattribute__(expected_field) + == np.ravel(params["values"]).tolist()) else: assert (tensor_proto.__getattribute__(expected_field) == np.frombuffer(params["values"], @@ -362,6 +372,31 @@ def test_make_tensor_proto_valid_string(params, expected_shape, expected_dtype, assert tensor_proto.tensor_shape == expected_shape +@pytest.mark.parametrize("params, expected_shape", [ + ({"values": "string", "dtype": DataType.DT_STRING}, + TensorShapeProto(dim=[TensorShapeProto.Dim(size=1)]) + ), + ({"values": ["list", "of", "strings"], "shape": [3], "dtype": DataType.DT_STRING}, + TensorShapeProto(dim=[TensorShapeProto.Dim(size=3)]) + ), + ({"values": [["nested", "list", "of", "strings"]]}, + TensorShapeProto(dim=[TensorShapeProto.Dim(size=1), TensorShapeProto.Dim(size=4)]) + ), + # Upon numpy array creation it will be casted to numpy.str_ data type + ({"values": [1, 2, "three"]}, + TensorShapeProto(dim=[TensorShapeProto.Dim(size=3)]) + ), + ({"values": [[1, 2], [3, "four"]]}, + TensorShapeProto(dim=[TensorShapeProto.Dim(size=2), TensorShapeProto.Dim(size=2)]) + ), +]) +def test_make_tensor_proto_valid_string(params, expected_shape): + tensor_proto = make_tensor_proto(**params) + assert tensor_proto.string_val == np.ravel(params["values"]).astype(np.bytes_).tolist() + assert tensor_proto.dtype == DataType.DT_STRING + assert tensor_proto.tensor_shape == expected_shape + + def test_make_tensor_proto_valid_string_to_float_dtype(): values = bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00]) tensor_proto = make_tensor_proto(values=values, shape=[3], dtype=DataType.DT_INT16) @@ -625,40 +660,6 @@ def test_make_tensor_proto_invalid_values_type(): assert str(exception) == "values type should be (list, np.ndarray, scalar), but is tuple" -def test_make_tensor_proto_invalid_string_2D_array(): - values = bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00]) - with pytest.raises(ValueError) as exception_info: - make_tensor_proto(values=[[values, values], [values, values]], - shape=None, dtype=DataType.DT_STRING) - exception = exception_info.value - assert str(exception) == "bytes values with dtype DT_STRING must be in shape [N]" - - -def test_make_tensor_proto_invalid_string_reshape(): - values = bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00]) - with pytest.raises(ValueError) as exception_info: - make_tensor_proto(values=values, shape=[6], dtype=DataType.DT_STRING) - exception = exception_info.value - assert str(exception) == "cannot reshape array of size 1 into shape (6,)" - - -def test_make_tensor_proto_invalid_string_reshape_2(): - values = bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00]) - with pytest.raises(ValueError) as exception_info: - make_tensor_proto(values=values, shape=[2, 3], dtype=DataType.DT_STRING) - exception = exception_info.value - assert str(exception) == "bytes values with dtype DT_STRING must be in shape [N]" - - -def test_make_tensor_proto_invalid_string_2D_array_with_shape(): - values = bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00]) - with pytest.raises(ValueError) as exception_info: - make_tensor_proto(values=[[values, values], [values, values]], - shape=[2, 2], dtype=DataType.DT_STRING) - exception = exception_info.value - assert str(exception) == "bytes values with dtype DT_STRING must be in shape [N]" - - def test_make_tensor_proto_invalid_int_reshape(): values = [1, 2, 3] with pytest.raises(ValueError) as exception_info: @@ -673,11 +674,3 @@ def test_make_tensor_proto_invalid_empty_list_of_empty_lists_reshape(): make_tensor_proto(values=values, shape=[4, 2], dtype=DataType.DT_INT8) exception = exception_info.value assert str(exception) == "cannot reshape array of size 0 into shape (4,2)" - - -def test_make_tensor_proto_invalid_dtype_provided(): - values = [[1.0, 2.0, 3.0], [1.0, 2.0, 3.0]] - with pytest.raises(ValueError) as exception_info: - make_tensor_proto(values=values, shape=None, dtype=DataType.DT_STRING) - exception = exception_info.value - assert str(exception) == "bytes values with dtype DT_STRING must be in shape [N]" diff --git a/client/python/ovmsclient/lib/tests/tfs_compat_http/config.py b/client/python/ovmsclient/lib/tests/tfs_compat_http/config.py index f31031ba62..66d43e52dd 100644 --- a/client/python/ovmsclient/lib/tests/tfs_compat_http/config.py +++ b/client/python/ovmsclient/lib/tests/tfs_compat_http/config.py @@ -20,6 +20,7 @@ import requests from http import HTTPStatus from numpy import array, int32, float32, float128 +import numpy as np from ovmsclient.tfs_compat.protos.tensorflow.core.protobuf.error_codes_pb2 import Code as ErrorCode from ovmsclient.tfs_compat.base.errors import InvalidInputError, ModelNotFoundError @@ -57,14 +58,6 @@ "input1": (1, 2, 3) }, 'model_name', 0, TypeError, "values type should be (list, np.ndarray, scalar), but is tuple"), - ({ - "input1": [ - [bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00]), - bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00])], - [bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00]), - bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00])] - ] - }, 'model_name', 0, ValueError, "bytes values with dtype DT_STRING must be in shape [N]"), ] # (inputs_dict, @@ -85,11 +78,13 @@ ({ "input1": 5.0, - "input2": bytes([1, 2, 3]) + "input2": bytes([1, 2, 3]), + "input3": ["list", "of", "strings"] }, json.dumps({ "inputs": { "input1": [5.0], - "input2": [{"b64": "AQID"}] + "input2": [{"b64": "AQID"}], + "input3": ["list", "of", "strings"] } }), 'model_name', 0), @@ -119,7 +114,10 @@ ([1, 2, 3.0], [1.0, 2.0, 3.0]), ([bytes([1, 2, 3]), bytes([4, 5, 6]), bytes([7, 8, 9])], - [{"b64": "AQID"}, {"b64": "BAUG"}, {"b64": "BwgJ"}]) + [{"b64": "AQID"}, {"b64": "BAUG"}, {"b64": "BwgJ"}]), + + ([[bytes([111, 118]), bytes([109, 115])], [bytes([111, 118]), bytes([109, 115])]], + [["ov", "ms"], ["ov", "ms"]]) ] # (inputs_dict, @@ -136,23 +134,13 @@ "The requested array has an inhomogeneous shape after 2 dimensions. " "The detected shape was (3, 1) + inhomogeneous part.")), - ([1, 2, 3, "str"], - TypeError, "provided values type is not valid"), - - ([[1, 2], [3, 4], ["five", 6]], + (np.array(b'1', dtype=np.dtype(np.void, 10)), TypeError, "provided values type is not valid"), (float128(2.5), TypeError, "provided values type is not valid"), ((1, 2, 3), TypeError, "values type should be (list, np.ndarray, scalar), but is tuple"), - - ([ - [bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00]), - bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00])], - [bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00]), - bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00])] - ], ValueError, "bytes values with dtype DT_STRING must be in shape [N]"), ] # (config_dict, diff --git a/demos/README.md b/demos/README.md index aa7b285cc7..ace1a21a46 100644 --- a/demos/README.md +++ b/demos/README.md @@ -15,6 +15,8 @@ ovms_demo_single_face_analysis_pipeline ovms_demo_multi_faces_analysis_pipeline ovms_docs_demo_ensemble + ovms_docs_demo_mediapipe_image_classification + ovms_docs_demo_mediapipe_multi_model ovms_docs_image_classification ovms_demo_using_onnx_model ovms_demo_tf_classification @@ -23,6 +25,7 @@ ovms_demo_real_time_stream_analysis ovms_demo_bert ovms_demo_gptj_causal_lm + ovms_demo_universal-sentence-encoder ovms_demo_speech_recognition ovms_demo_benchmark_client @@ -48,6 +51,7 @@ OpenVINO Model Server demos have been created to showcase the usage of the model |[Real Time Stream Analysis](real_time_stream_analysis/python/README.md)| Analyze RTSP video stream in real time with generic application template for custom pre and post processing routines as well as simple results visualizer for displaying predictions in the browser. | |[Natural Language Processing with BERT](bert_question_answering/python/README.md)|Provide a knowledge source and a query and use BERT model for question answering use case via gRPC API. This demo uses dynamic shape feature. | |[GPT-J Causal Language Modeling](gptj_causal_lm/python/README.md)|Write start of the sentence and let GPT-J continue via gRPC API. This demo uses dynamic shape feature. | +|[Using inputs data in string format with universal-sentence-encoder model](universal-sentence-encoder/README.md)| Handling AI model with text as the model input. | |[Speech Recognition on Kaldi Model](speech_recognition_with_kaldi_model/python/README.md)|Run inference on a speech sample and use Kaldi model to perform speech recognition via gRPC API. This demo uses [stateful model](../docs/stateful_models.md). | |[Benchmark App](benchmark/python/README.md)|Generate traffic and measure performance of the model served in OpenVINO Model Server.| |[Face Blur Pipeline](face_blur/python/README.md)|Detect faces and blur image using a pipeline of object detection models with a custom node for intermediate results processing via gRPC API. This demo uses [pipeline](../docs/dag_scheduler.md) with [face_blur custom node](https://github.com/openvinotoolkit/model_server/tree/releases/2022/1/src/custom_nodes/face_blur). | diff --git a/demos/benchmark/python/client.py b/demos/benchmark/python/client.py index b6c7e1540e..b3e834a86f 100644 --- a/demos/benchmark/python/client.py +++ b/demos/benchmark/python/client.py @@ -363,6 +363,8 @@ def __fix_shape_and_type(self, input_name): dtype = tensorflow.dtypes.int32 elif dtype == self.DTYPE_INT_64: dtype = tensorflow.dtypes.int64 + elif dtype == self.DTYPE_UINT_8: + dtype = tensorflow.dtypes.uint8 else: raise ValueError(f"not supported type: {dtype}") return shape, dtype diff --git a/demos/bert_question_answering/python/Dockerfile b/demos/bert_question_answering/python/Dockerfile index da090d2612..b96faef4c9 100644 --- a/demos/bert_question_answering/python/Dockerfile +++ b/demos/bert_question_answering/python/Dockerfile @@ -22,4 +22,4 @@ COPY bert_question_answering.py tokens_bert.py html_reader.py requirements.txt . ADD https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.1/models_bin/2/bert-small-uncased-whole-word-masking-squad-int8-0002/vocab.txt . RUN pip3 install --upgrade pip && pip3 install --no-cache-dir -r requirements.txt -ENTRYPOINT ["python3", "bert_question_answering.py", "-v", "vocab.txt", "-i", "https://en.wikipedia.org/wiki/BERT_(language_model)", "--question", "what is bert", "--grpc_port", "9000", "--input_names", "input_ids,attention_mask,token_type_ids,position_ids"] +ENTRYPOINT ["python3", "bert_question_answering.py", "-v", "vocab.txt", "-i", "https://en.wikipedia.org/w/index.php?title=BERT_(language_model)&oldid=1148859098", "--question", "what is bert", "--grpc_port", "9000", "--input_names", "input_ids,attention_mask,token_type_ids,position_ids"] diff --git a/demos/bert_question_answering/python/README.md b/demos/bert_question_answering/python/README.md index 17b7f46c15..e9fa9905d3 100644 --- a/demos/bert_question_answering/python/README.md +++ b/demos/bert_question_answering/python/README.md @@ -31,30 +31,30 @@ docker run -it --network host -e no_proxy=localhost bert-client:latest --grpc_ad Docker image with BERT client by default start the container with a command: ``` -python bert_question_answering.py -v vocab.txt -i "https://en.wikipedia.org/wiki/BERT_(language_model)" --question "what is bert" --grpc_port 9000 --input_names input_ids,attention_mask,token_type_ids,position_ids +python bert_question_answering.py -v vocab.txt -i "https://en.wikipedia.org/w/index.php?title=BERT_(language_model)&oldid=1148859098" --question "what is bert" --grpc_port 9000 --input_names input_ids,attention_mask,token_type_ids,position_ids ``` You can change the entrypoint to adjust to different parameters Example of the output snippet: ```bash question: what is bert -[ INFO ] Sequence of length 395 is processed with 1.85 requests/sec (0.54 sec per request) -[ INFO ] Sequence of length 368 is processed with 2.74 requests/sec (0.36 sec per request) -[ INFO ] Sequence of length 32 is processed with 16.51 requests/sec (0.061 sec per request) +[ INFO ] Sequence of length 418 is processed with 9.91 requests/sec (0.1 sec per request) +[ INFO ] Sequence of length 305 is processed with 12.08 requests/sec (0.083 sec per request) +[ INFO ] Sequence of length 349 is processed with 12.14 requests/sec (0.082 sec per request) +[ INFO ] Sequence of length 110 is processed with 17.98 requests/sec (0.056 sec per request) [ INFO ] The performance below is reported only for reference purposes, please use the benchmark_app tool (part of the OpenVINO samples) for any actual measurements. -[ INFO ] 3 requests were processed in 1.00sec (0.33sec per request) -[ INFO ] ---answer: 0.26 deeply bidirectional, unsupervised language representation -[ INFO ] When BERT was published, it achieved state-of-the-art performance on a number of natural language understanding tasks:[1] -The reasons for BERT's state-of-the-art performance on these natural language understanding tasks are not yet well understood.[8][9] Current research has focused on investigating the relationship behind BERT's output as a result of carefully chosen input sequences,[10][11] analysis of internal vector representations through probing classifiers,[12][13] and the relationships represented by attention weights.[8][9] -BERT has its origins from pre-training contextual representations including semi-supervised sequence learning,[14] generative pre-training, ELMo,[15] and ULMFit.[16] Unlike previous models, BERT is a deeply bidirectional, unsupervised language representation, pre-trained using only a plain text corpus. Context-free models such as word2vec or GloVe generate a single word embedding representation for each word in the vocabulary, where BERT takes into account the context for each occurrence of a given word. For instance, whereas the vector for "running" will have the same word2vec vector representation for both of its occurrences in the sentences "He is running a company" and "He is running a marathon", BERT will provide a contextualized embedding that will be different according to the sentence. -On October 25, 2019, Google Search announced that they had started applying BERT models for English language search queries within the US.[17] On December 9, 2019, it was reported that BERT had been adopted by Google Search for over 70 languages.[18] In October 2020, almost every single English-based query was processed by BERT.[19] - -[ INFO ] ---answer: 0.22 Bidirectional Encoder Representations from Transformers -[ INFO ] Bidirectional Encoder Representations from Transformers (BERT) is a transformer-based machine learning technique for natural language processing (NLP) pre-training developed by Google. BERT was created and published in 2018 by Jacob Devlin and his colleagues from Google.[1][2] In 2019, Google announced that it had begun leveraging BERT in its search engine, and by late 2020 it was using BERT in almost every English-language query. A 2020 literature survey concluded that "in a little over a year, BERT has become a ubiquitous baseline in NLP experiments", counting over 150 research publications analyzing and improving the model.[3] -The original English-language BERT has two models:[1] (1) the BERTBASE: 12 encoders with 12 bidirectional self-attention heads, and (2) the BERTLARGE: 24 encoders with 16 bidirectional self-attention heads. Both models are pre-trained from unlabeled data extracted from the BooksCorpus[4] with 800M words and English Wikipedia with 2,500M words. -BERT is at its core a transformer language model with a variable number of encoder layers and self-attention heads. The architecture is "almost identical" to the original transformer implementation in Vaswani et al. (2017).[5] -BERT was pretrained on two tasks: language modeling (15% of tokens were masked and BERT was trained to predict them from context) and next sentence prediction (BERT was trained to predict if a chosen next sentence was probable or not given the first sentence). As a result of the training process, BERT learns contextual embeddings for words. After pretraining, which is computationally expensive, BERT can be finetuned with fewer resources on smaller datasets to optimize its performance on specific tasks.[1][6] - -[ INFO ] ---answer: 0.36 The research paper describing BERT -[ INFO ] The research paper describing BERT won the Best Long Paper Award at the 2019 Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL).[20] +[ INFO ] 4 requests were processed in 0.34sec (0.086sec per request) +[ INFO ] ---answer: 0.35 Bidirectional Encoder Representations from Transformers +[ INFO ] This is the current revision of this page, as edited by Mandarax (talk | contribs) at 19:01, 8 April 2023 (Correct capitalization). The present address (URL) is a permanent link to this version.Bidirectional Encoder Representations from Transformers (BERT) is a family of masked-language models introduced in 2018 by researchers at Google.[1][2] A 2020 literature survey concluded that "in a little over a year, BERT has become a ubiquitous baseline in Natural Language Processing (NLP) experiments counting over 150 research publications analyzing and improving the model."[3] +BERT was originally implemented in the English language at two model sizes:[1] (1) BERTBASE: 12 encoders with 12 bidirectional self-attention heads totaling 110 million parameters, and (2) BERTLARGE: 24 encoders with 16 bidirectional self-attention heads totaling 340 million parameters. Both models were pre-trained on the Toronto BookCorpus[4] (800M words) and English Wikipedia (2,500M words). +BERT is based on the transformer architecture. Specifically, BERT is composed of Transformer encoder layers. +BERT was pre-trained simultaneously on two tasks: language modeling (15% of tokens were masked, and the training objective was to predict the original token given its context) and next sentence prediction (the training objective was to classify if two spans of text appeared sequentially in the training corpus).[5] As a result of this training process, BERT learns latent representations of words and sentences in context. After pre-training, BERT can be fine-tuned with fewer resources on smaller datasets to optimize its performance on specific tasks such as NLP tasks (language inference, text classification) and sequence-to-sequence based language generation tasks (question-answering, conversational response generation).[1][6] The pre-training stage is significantly more computationally expensive than fine-tuning. + +[ INFO ] ---answer: 0.14 deeply bidirectional, unsupervised language representation +[ INFO ] In contrast to deep learning neural networks which require very large amounts of data, BERT has already been pre-trained which means that it has learnt the representations of the words and sentences as well as the underlying semantic relations that they are connected with. BERT can then be fine-tuned on smaller datasets for specific tasks such as sentiment classification. The pre-trained models are chosen according to the content of the given dataset one uses but also the goal of the task. For example, if the task is a sentiment classification task on financial data, a pre-trained model for the analysis of sentiment of financial text should be chosen. The weights of the original pre-trained models were released on GitHub.[16] +BERT was originally published by Google researchers Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. The design has its origins from pre-training contextual representations, including semi-supervised sequence learning,[17] generative pre-training, ELMo,[18] and ULMFit.[19] Unlike previous models, BERT is a deeply bidirectional, unsupervised language representation, pre-trained using only a plain text corpus. Context-free models such as word2vec or GloVe generate a single word embedding representation for each word in the vocabulary, where BERT takes into account the context for each occurrence of a given word. For instance, whereas the vector for "running" will have the same word2vec vector representation for both of its occurrences in the sentences "He is running a company" and "He is running a marathon", BERT will provide a contextualized embedding that will be different according to the sentence. + +[ INFO ] ---answer: 0.12 BERT models for English language search queries within the US +[ INFO ] On October 25, 2019, Google announced that they had started applying BERT models for English language search queries within the US.[20] On December 9, 2019, it was reported that BERT had been adopted by Google Search for over 70 languages.[21] In October 2020, almost every single English-based query was processed by a BERT model.[22] +The research paper describing BERT won the Best Long Paper Award at the 2019 Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL).[23] ``` diff --git a/demos/bert_question_answering/python/bert_question_answering.py b/demos/bert_question_answering/python/bert_question_answering.py index 0ddc7ee13a..4bb294e043 100644 --- a/demos/bert_question_answering/python/bert_question_answering.py +++ b/demos/bert_question_answering/python/bert_question_answering.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2021 Intel Corporation +# Copyright (c) 2021-2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -23,11 +23,8 @@ from tokens_bert import text_to_tokens, load_vocab_file, Token from html_reader import get_paragraphs -import grpc import numpy as np -from tensorflow import make_tensor_proto, make_ndarray -from tensorflow_serving.apis import predict_pb2 -from tensorflow_serving.apis import prediction_service_pb2_grpc +import ovmsclient class ConcatenatedParagraph(): def __init__(self, text="", tokens=[]): @@ -102,8 +99,7 @@ def main(): args = build_argparser().parse_args() # create grpc connection - channel = grpc.insecure_channel("{}:{}".format(args.grpc_address,args.grpc_port)) - stub = prediction_service_pb2_grpc.PredictionServiceStub(channel) + client = ovmsclient.make_grpc_client("{}:{}".format(args.grpc_address,args.grpc_port)) if args.colors: COLOR_RED = "\033[91m" @@ -183,23 +179,9 @@ def main(): if len(input_names)>3: inputs[input_names[3]] = np.arange(input_ids_length, dtype=np.int64)[None,:] - - #print("inputs:",inputs) - - # create grpc prediction request - request = predict_pb2.PredictRequest() - request.model_spec.name = args.model_name - for inp_name in inputs: - request.inputs[inp_name].CopyFrom(make_tensor_proto(inputs[inp_name], shape=(inputs[inp_name].shape))) - t_start = time.perf_counter() - result = stub.Predict(request, 10.0) # result includes a dictionary with all model outputs + res = client.predict(inputs, args.model_name, timeout=10.0) t_end = time.perf_counter() - #print("\nresult:", result) - res = {} - for out_name in output_names: - # print("out_name:",out_name) - res[out_name] = make_ndarray(result.outputs[out_name]) t_count += 1 log.info("Sequence of length {} is processed with {:0.2f} requests/sec ({:0.2} sec per request)".format( diff --git a/demos/bert_question_answering/python/requirements.txt b/demos/bert_question_answering/python/requirements.txt index a553919019..0deafd1de3 100644 --- a/demos/bert_question_answering/python/requirements.txt +++ b/demos/bert_question_answering/python/requirements.txt @@ -1,2 +1,2 @@ -tensorflow-serving-api==2.11.0 +ovmsclient validators diff --git a/demos/common/cpp/.bazelrc b/demos/common/cpp/.bazelrc index 40ca1e660c..bf1c4f30f3 100644 --- a/demos/common/cpp/.bazelrc +++ b/demos/common/cpp/.bazelrc @@ -62,6 +62,10 @@ build --cxxopt=-fno-strict-overflow build --cxxopt=-fno-delete-null-pointer-checks build --cxxopt=-fwrapv build --cxxopt=-fstack-protector +build --cxxopt=-fstack-clash-protection +build --cxxopt=-Wformat +build --cxxopt=-Wformat-security +build --cxxopt=-Werror=format-security # Adding "--cxxopt=-D_GLIBCXX_USE_CXX11_ABI=0" creates parity with TF # compilation options. It also addresses memory use due to diff --git a/demos/common/cpp/Dockerfile b/demos/common/cpp/Dockerfile index bb0e47805a..dcb04fd8d6 100644 --- a/demos/common/cpp/Dockerfile +++ b/demos/common/cpp/Dockerfile @@ -101,7 +101,7 @@ RUN bazel build \ @com_github_grpc_grpc//:grpc++ \ @com_google_protobuf//:protobuf_lite \ @org_tensorflow//tensorflow/core:lib \ - @opencv//:opencv + @linux_opencv//:opencv COPY src/ /build/src/ diff --git a/demos/common/cpp/WORKSPACE b/demos/common/cpp/WORKSPACE index 3479907a50..69bea5d63c 100644 --- a/demos/common/cpp/WORKSPACE +++ b/demos/common/cpp/WORKSPACE @@ -67,7 +67,7 @@ grpc_extra_deps() ##################### OPEN CV ###################### # OPENCV DEFINITION FOR BUILDING FROM BINARY RELEASE: ########################## new_local_repository( - name = "opencv", + name = "linux_opencv", build_file = "@//third_party/opencv:BUILD", path = "/opt/opencv", ) diff --git a/demos/common/cpp/src/BUILD b/demos/common/cpp/src/BUILD index cf52e6d471..04b4779d06 100644 --- a/demos/common/cpp/src/BUILD +++ b/demos/common/cpp/src/BUILD @@ -41,7 +41,7 @@ cc_binary( "@com_google_protobuf//:protobuf_lite", "@org_tensorflow//tensorflow/core:framework", "@org_tensorflow//tensorflow/core:lib", - "@opencv//:opencv", + "@linux_opencv//:opencv", ] ) @@ -57,7 +57,7 @@ cc_binary( "@com_google_protobuf//:protobuf_lite", "@org_tensorflow//tensorflow/core:framework", "@org_tensorflow//tensorflow/core:lib", - "@opencv//:opencv", + "@linux_opencv//:opencv", ] ) diff --git a/demos/gptj_causal_lm/python/.gitignore b/demos/gptj_causal_lm/python/.gitignore index 5f48824542..b792e9c0e4 100644 --- a/demos/gptj_causal_lm/python/.gitignore +++ b/demos/gptj_causal_lm/python/.gitignore @@ -1,3 +1,4 @@ .venv/ local-pt-checkpoint/ -onnx/ \ No newline at end of file +onnx/ +workspace/ diff --git a/demos/gptj_causal_lm/python/Makefile b/demos/gptj_causal_lm/python/Makefile new file mode 100644 index 0000000000..636d6eabe8 --- /dev/null +++ b/demos/gptj_causal_lm/python/Makefile @@ -0,0 +1,38 @@ +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +BLINGFIRE_SHA=5089d31914cbed7a24589e753bd6cd362a377fbb + +.PHONY: setup + +default: setup + +setup: +# Build custom node + cd ../../../src/custom_nodes/tokenizer && \ + make BASE_OS=ubuntu + mkdir -p workspace/lib && \ + cp ../../../src/custom_nodes/tokenizer/lib/ubuntu/libdetokenizer.so workspace/lib/libdetokenizer.so && \ + cp ../../../src/custom_nodes/tokenizer/lib/ubuntu/libtokenizer.so workspace/lib/libtokenizer.so +# Prepare tokenization models + mkdir -p workspace/tokenizers + wget https://github.com/microsoft/BlingFire/raw/${BLINGFIRE_SHA}/ldbsrc/ldb/gpt2.bin -O workspace/tokenizers/gpt2.bin + wget https://github.com/microsoft/BlingFire/raw/${BLINGFIRE_SHA}/ldbsrc/ldb/gpt2.i2w -O workspace/tokenizers/gpt2.i2w +# Copy configuration file to workspace directory + cp config.json workspace/. + +clean: + @rm -rf workspace \ No newline at end of file diff --git a/demos/gptj_causal_lm/python/README.md b/demos/gptj_causal_lm/python/README.md index 18fce21916..d38e82c27e 100755 --- a/demos/gptj_causal_lm/python/README.md +++ b/demos/gptj_causal_lm/python/README.md @@ -101,3 +101,58 @@ tensor([[[ 8.4078, 7.2025, 5.1148, ..., -6.6914, -6.7891, -6.6537], grad_fn=) predicted word: a ``` + +# Pipeline mode with server side tokenization and detokenization + +This variant offloads tokenizaton and detokenization step from client to the server. OVMS can convert string proto to `2D U8` tensor and pass the data to tokenization custom node. This way we generate tokens for `gpt-j-6b` model automatically and get the response as text instead of probability vector. + +![diagram](../../../src/custom_nodes/tokenizer/diagram.svg) + +## Prepare environment + +Use `make` command to prepare custom node libraries, blingfire tokenization models and configuration file. + +```bash +make +``` + +Workspace should look as follows: + +```bash +tree workspace +workspace +├── config.json +├── lib +│ ├── libdetokenizer.so +│ └── libtokenizer.so +└── tokenizers + ├── gpt2.bin + └── gpt2.i2w + +2 directories, 5 files +``` + +Start OVMS with prepared workspace: + +```bash +docker run -d --rm -p 9000:9000 \ + -v $(pwd)/onnx:/onnx:ro \ + -v $(pwd)/workspace:/workspace:ro \ + openvino/model_server \ + --port 9000 \ + --config_path /workspace/config.json +``` + +Install Tensorflow Serving API package: +```bash +pip install --upgrade pip +pip install tensorflow-serving-api==2.11.0 +``` + +Run example client: + +```bash +python3 dag_client.py --url localhost:9000 --model_name my_gpt_pipeline --input "Neurons are fascinating" + +0.5515012741088867 Neurons are fascinating cells that are responsible for the transmission of information from one brain region to another. They are also responsible for the production of hormones and neurotransmitters that are responsible for the regulation of mood, sleep, appetite, and sexual function. +``` diff --git a/demos/gptj_causal_lm/python/app.py b/demos/gptj_causal_lm/python/app.py index 59ab24a8a7..d265c92c5d 100644 --- a/demos/gptj_causal_lm/python/app.py +++ b/demos/gptj_causal_lm/python/app.py @@ -48,13 +48,11 @@ predicted_token_id = token = torch.argmax(torch.nn.functional.softmax(torch.Tensor(results[0,-1,:]),dim=-1),dim=-1) word = tokenizer.decode(predicted_token_id) input_sentence += word - # print(f"Iteration: {iteration}\nLast predicted token: {predicted_token_id}\nLast latency: {last_latency}s\n{input_sentence}") print(word, end='', flush=True) iteration += 1 if predicted_token_id == args['eos_token_id']: break -# split line below to 3 different lines print(f"Number of iterations: {iteration}") print(f"First latency: {first_latency}s") print(f"Last latency: {last_latency}s") diff --git a/demos/gptj_causal_lm/python/config.json b/demos/gptj_causal_lm/python/config.json new file mode 100644 index 0000000000..c4b4b9f1d4 --- /dev/null +++ b/demos/gptj_causal_lm/python/config.json @@ -0,0 +1,123 @@ +{ + "model_config_list": [ + {"config": { + "name": "gpt", + "base_path": "/onnx", + "plugin_config": {"PERFORMANCE_HINT": "LATENCY", "NUM_STREAMS": 1}}} + ], + "pipeline_config_list": [ + { + "name": "my_gpt_pipeline", + "inputs": [ + "texts" + ], + "nodes": [ + { + "name": "node_1", + "type": "custom", + "inputs": [ + { + "texts": { + "node_name": "request", + "data_item": "texts" + } + } + ], + "outputs": [ + { + "data_item": "input_ids", + "alias": "out" + }, + { + "data_item": "attention_mask", + "alias": "attention" + } + ], + "library_name": "tokenizer", + "params": { + "max_ids_arr_length": "4096", + "model_path": "/workspace/tokenizers/gpt2.bin" + } + }, + { + "name": "gpt_node", + "model_name": "gpt", + "type": "DL model", + "inputs": [ + { + "input_ids": { + "node_name": "node_1", + "data_item": "out" + } + }, + { + "attention_mask": { + "node_name": "node_1", + "data_item": "attention" + } + } + ], + "outputs": [ + { + "data_item": "logits", + "alias": "logits" + } + ] + }, + { + "name": "node_2", + "type": "custom", + "inputs": [ + { + "logits": { + "node_name": "gpt_node", + "data_item": "logits" + } + }, + { + "input_ids": { + "node_name": "node_1", + "data_item": "out" + } + }, + { + "attention_mask": { + "node_name": "node_1", + "data_item": "attention" + } + } + ], + "outputs": [ + { + "data_item": "texts", + "alias": "texts" + } + ], + "library_name": "detokenizer", + "params": { + "max_buffer_length": "8192", + "model_path": "/workspace/tokenizers/gpt2.i2w" + } + } + ], + "outputs": [ + { + "autocompletions_string": { + "node_name": "node_2", + "data_item": "texts" + } + } + ] + } + ], + "custom_node_library_config_list": [ + { + "name": "tokenizer", + "base_path": "/workspace/lib/libtokenizer.so" + }, + { + "name": "detokenizer", + "base_path": "/workspace/lib/libdetokenizer.so" + } + ] +} diff --git a/demos/gptj_causal_lm/python/dag_client.py b/demos/gptj_causal_lm/python/dag_client.py new file mode 100644 index 0000000000..e81ec9c183 --- /dev/null +++ b/demos/gptj_causal_lm/python/dag_client.py @@ -0,0 +1,48 @@ +# +# Copyright 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import time +import argparse +import grpc +import numpy as np +from tensorflow_serving.apis import predict_pb2 +from tensorflow_serving.apis import prediction_service_pb2_grpc +from tensorflow import make_tensor_proto, make_ndarray + +parser = argparse.ArgumentParser(description='Demo for GPT-J causal LM DAG requests using Tensorflow Serving gRPC API') + +parser.add_argument('--input', required=True, help='Beginning of a sentence', type=str) +parser.add_argument('--url', required=False, help='Url to connect to', type=str, default='localhost:9000') +parser.add_argument('--model_name', required=False, help='Model name in the serving', type=str, default='my_gpt_pipeline') +args = vars(parser.parse_args()) + +channel = grpc.insecure_channel(args['url']) +stub = prediction_service_pb2_grpc.PredictionServiceStub(channel) + +input_sentence = args['input'] + +predict_request = predict_pb2.PredictRequest() +predict_request.model_spec.name = args['model_name'] + + +while True: + predict_request.inputs['texts'].CopyFrom(make_tensor_proto(np.array([input_sentence]))) + start_time = time.time() + predict_response = stub.Predict(predict_request, 10.0) + latency = time.time() - start_time + results = make_ndarray(predict_response.outputs['autocompletions_string']) + input_sentence = results[0].decode('utf-8') + print(latency, input_sentence) diff --git a/demos/gptj_causal_lm/python/requirements.txt b/demos/gptj_causal_lm/python/requirements.txt index 7e30bb972b..0bfaa08f5b 100755 --- a/demos/gptj_causal_lm/python/requirements.txt +++ b/demos/gptj_causal_lm/python/requirements.txt @@ -1,7 +1,7 @@ -accelerate +accelerate==0.17.1 numpy==1.23 -transformers -transformers[onnx] -onnxruntime -torch -ovmsclient \ No newline at end of file +transformers==4.26.1 +transformers[onnx]==4.26.1 +onnxruntime==1.14.1 +torch==1.13.1 +ovmsclient diff --git a/demos/horizontal_text_detection/python/horizontal_text_detection.py b/demos/horizontal_text_detection/python/horizontal_text_detection.py index 68baad2ee8..141fdd5c2c 100644 --- a/demos/horizontal_text_detection/python/horizontal_text_detection.py +++ b/demos/horizontal_text_detection/python/horizontal_text_detection.py @@ -72,7 +72,7 @@ def get_text(output): def draw_boxes_spotting(frame, result): output = make_ndarray(result.outputs['boxes']) - for i in range(0, 100): # there is returned 200 detections for each image in the batch + for i in range(0, output.shape[0]): # there is returned a dynamic list of boxes detection = output[i,:] if detection[4] > 0.3: x_min = int(detection[0]) diff --git a/demos/mediapipe/image_classification/README.md b/demos/mediapipe/image_classification/README.md new file mode 100644 index 0000000000..16fba3c0a1 --- /dev/null +++ b/demos/mediapipe/image_classification/README.md @@ -0,0 +1,82 @@ +# MediaPipe Image Classification Demo {#ovms_docs_demo_mediapipe_image_classification} + +This guide shows how to implement [MediaPipe](../../../docs/mediapipe.md) graph using OVMS. + +Example usage of graph that contains only one model - resnet: + +## Prepare the repository + +Clone the repository and enter mediapipe image_classification directory +```bash +git clone https://github.com/openvinotoolkit/model_server.git +cd model_server/demos/mediapipe/image_classification +``` + +## Download ResNet50 model + +```bash +mkdir -p model/1 +wget -P model/1 https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.1/models_bin/2/resnet50-binary-0001/FP32-INT1/resnet50-binary-0001.bin +wget -P model/1 https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.1/models_bin/2/resnet50-binary-0001/FP32-INT1/resnet50-binary-0001.xml +``` + +## Run OpenVINO Model Server +```bash +docker run -d -v $PWD:/mediapipe -p 9000:9000 openvino/model_server:latest --config_path /mediapipe/config.json --port 9000 +``` + +## Run the client: +```bash +cd model_server/client/python/kserve-api/samples + +python grpc_infer_resnet.py --model_name resnetMediapipe --grpc_port 9008 --images_numpy_path . --transpose_input False +./../imgs.npy --input_name in --output_name out --labels_numpy_path ../../lbs.npy +Image data range: 0.0 : 255.0 +Start processing: + Model name: resnetMediapipe + Iterations: 10 + Images numpy path: ../../imgs.npy + Numpy file shape: (10, 3, 224, 224) + +Iteration 1; Processing time: 14.40 ms; speed 69.46 fps +imagenet top results in a single batch: + 0 airliner 404 ; Correct match. +Iteration 2; Processing time: 10.72 ms; speed 93.32 fps +imagenet top results in a single batch: + 0 Arctic fox, white fox, Alopex lagopus 279 ; Correct match. +Iteration 3; Processing time: 9.27 ms; speed 107.83 fps +imagenet top results in a single batch: + 0 bee 309 ; Correct match. +Iteration 4; Processing time: 8.47 ms; speed 118.02 fps +imagenet top results in a single batch: + 0 golden retriever 207 ; Correct match. +Iteration 5; Processing time: 9.17 ms; speed 109.03 fps +imagenet top results in a single batch: + 0 gorilla, Gorilla gorilla 366 ; Correct match. +Iteration 6; Processing time: 8.56 ms; speed 116.78 fps +imagenet top results in a single batch: + 0 magnetic compass 635 ; Correct match. +Iteration 7; Processing time: 8.39 ms; speed 119.16 fps +imagenet top results in a single batch: + 0 peacock 84 ; Correct match. +Iteration 8; Processing time: 8.44 ms; speed 118.44 fps +imagenet top results in a single batch: + 0 pelican 144 ; Correct match. +Iteration 9; Processing time: 8.36 ms; speed 119.55 fps +imagenet top results in a single batch: + 0 snail 113 ; Correct match. +Iteration 10; Processing time: 9.16 ms; speed 109.19 fps +imagenet top results in a single batch: + 0 zebra 340 ; Correct match. + +processing time for all iterations +average time: 9.10 ms; average speed: 109.89 fps +median time: 8.50 ms; median speed: 117.65 fps +max time: 14.00 ms; min speed: 71.43 fps +min time: 8.00 ms; max speed: 125.00 fps +time percentile 90: 10.40 ms; speed percentile 90: 96.15 fps +time percentile 50: 8.50 ms; speed percentile 50: 117.65 fps +time standard deviation: 1.76 +time variance: 3.09 +Classification accuracy: 100.00 +``` diff --git a/demos/mediapipe/image_classification/config.json b/demos/mediapipe/image_classification/config.json new file mode 100644 index 0000000000..b82e499487 --- /dev/null +++ b/demos/mediapipe/image_classification/config.json @@ -0,0 +1,4 @@ +{ + "model_config_list": [], + "mediapipe_config_list": [{"name":"resnetMediapipe"}] +} diff --git a/demos/mediapipe/image_classification/graph.pbtxt b/demos/mediapipe/image_classification/graph.pbtxt new file mode 100644 index 0000000000..fae7b39fed --- /dev/null +++ b/demos/mediapipe/image_classification/graph.pbtxt @@ -0,0 +1,45 @@ +# +# Copyright 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +input_stream: "in" +output_stream: "out" +node { + calculator: "ModelAPISessionCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com / mediapipe.ModelAPIOVMSSessionCalculatorOptions]: { + servable_name: "resnet" + servable_version: "1" + } + } +} +node { + calculator: "ModelAPISideFeedCalculator" + input_side_packet: "SESSION:session" + input_stream: "B:in" + output_stream: "A:out" + node_options: { + [type.googleapis.com / mediapipe.ModelAPIInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "B" + value: "0" + } + tag_to_output_tensor_names { + key: "A" + value: "1463" + } + } + } +} \ No newline at end of file diff --git a/demos/mediapipe/image_classification/subconfig.json b/demos/mediapipe/image_classification/subconfig.json new file mode 100644 index 0000000000..18e7e84c2f --- /dev/null +++ b/demos/mediapipe/image_classification/subconfig.json @@ -0,0 +1,9 @@ +{ + "model_config_list": [ + {"config": { + "name": "resnet", + "base_path": "model" + } + } + ] +} diff --git a/demos/mediapipe/multi_model_graph/README.md b/demos/mediapipe/multi_model_graph/README.md new file mode 100644 index 0000000000..78beb7078e --- /dev/null +++ b/demos/mediapipe/multi_model_graph/README.md @@ -0,0 +1,33 @@ +# MediaPipe Multi Model Demo {#ovms_docs_demo_mediapipe_multi_model} + +This guide shows how to implement [MediaPipe](../../../docs/mediapipe.md) graph using OVMS. + +Example usage: + +## Prepare the repository + +Clone the repository and enter mediapipe image_classification directory +```bash +git clone https://github.com/openvinotoolkit/model_server.git +cd model_server/demos/mediapipe/multi_model_graph +``` + +## Download ResNet50 model + +```bash +cp -r ../../../src/test/add_two_inputs_model ./ +cp -r ../../../src/test/dummy ./ +``` + +## Run OpenVINO Model Server +Prepare virtualenv according to [kserve samples readme](https://github.com/openvinotoolkit/model_server/blob/main/client/python/kserve-api/samples/README.md) +```bash +docker run -d -v $PWD:/mediapipe -p 9000:9000 openvino/model_server:latest --config_path /mediapipe/config.json --port 9000 +``` + +## Run the client: +```bash +python mediapipe_multi_model_client.py --grpc_port 9000 +Output: +[[ 3. 5. 7. 9. 11. 13. 15. 17. 19. 21.]] +``` diff --git a/demos/mediapipe/multi_model_graph/config.json b/demos/mediapipe/multi_model_graph/config.json new file mode 100644 index 0000000000..19a1b44353 --- /dev/null +++ b/demos/mediapipe/multi_model_graph/config.json @@ -0,0 +1,4 @@ +{ + "model_config_list": [], + "mediapipe_config_list": [{"name":"dummyAdd"}] +} diff --git a/demos/mediapipe/multi_model_graph/graph.pbtxt b/demos/mediapipe/multi_model_graph/graph.pbtxt new file mode 100644 index 0000000000..36a29cb03b --- /dev/null +++ b/demos/mediapipe/multi_model_graph/graph.pbtxt @@ -0,0 +1,79 @@ +# +# Copyright 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +input_stream: "in1" +input_stream: "in2" +output_stream: "out" +node { + calculator: "ModelAPISessionCalculator" + output_side_packet: "SESSION:dummy" + node_options: { + [type.googleapis.com / mediapipe.ModelAPIOVMSSessionCalculatorOptions]: { + servable_name: "dummy" + servable_version: "1" + } + } +} +node { + calculator: "ModelAPISessionCalculator" + output_side_packet: "SESSION:add" + node_options: { + [type.googleapis.com / mediapipe.ModelAPIOVMSSessionCalculatorOptions]: { + servable_name: "add" + servable_version: "1" + } + } +} +node { + calculator: "ModelAPISideFeedCalculator" + input_side_packet: "SESSION:dummy" + input_stream: "DUMMY_IN:in1" + output_stream: "DUMMY_OUT:dummy_output" + node_options: { + [type.googleapis.com / mediapipe.ModelAPIInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "DUMMY_IN" + value: "b" + } + tag_to_output_tensor_names { + key: "DUMMY_OUT" + value: "a" + } + } + } +} +node { + calculator: "ModelAPISideFeedCalculator" + input_side_packet: "SESSION:add" + input_stream: "ADD_INPUT1:dummy_output" + input_stream: "ADD_INPUT2:in2" + output_stream: "SUM:out" + node_options: { + [type.googleapis.com / mediapipe.ModelAPIInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "ADD_INPUT1" + value: "input1" + } + tag_to_input_tensor_names { + key: "ADD_INPUT2" + value: "input2" + } + tag_to_output_tensor_names { + key: "SUM" + value: "sum" + } + } + } +} \ No newline at end of file diff --git a/demos/mediapipe/multi_model_graph/mediapipe_multi_model_client.py b/demos/mediapipe/multi_model_graph/mediapipe_multi_model_client.py new file mode 100644 index 0000000000..8ef3014d33 --- /dev/null +++ b/demos/mediapipe/multi_model_graph/mediapipe_multi_model_client.py @@ -0,0 +1,48 @@ +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import numpy as np +import datetime +import argparse +import tritonclient.grpc as grpcclient + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Sends requests via KServe gRPC API using images in numpy format. ' + 'It displays performance statistics and optionally the model accuracy') + parser.add_argument('--grpc_address',required=False, default='localhost', help='Specify url to grpc service. default:localhost') + parser.add_argument('--grpc_port',required=False, default=9000, help='Specify port to grpc service. default: 9000') + + args = vars(parser.parse_args()) + + address = "{}:{}".format(args['grpc_address'],args['grpc_port']) + + triton_client = grpcclient.InferenceServerClient( + url=address, + verbose=False) + + inputs = [] + inputs.append(grpcclient.InferInput("in1", [1,10], "FP32")) + inputs.append(grpcclient.InferInput("in2", [1,10], "FP32")) + inputs[0].set_data_from_numpy(np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]], dtype=np.float32)) + inputs[1].set_data_from_numpy(np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]], dtype=np.float32)) + outputs = [] + results = triton_client.infer( + model_name= "dummyAdd", + inputs=inputs, + outputs=outputs) + output = results.as_numpy("out") + print('Output:') + print(output) \ No newline at end of file diff --git a/demos/mediapipe/multi_model_graph/subconfig.json b/demos/mediapipe/multi_model_graph/subconfig.json new file mode 100644 index 0000000000..43c53be16e --- /dev/null +++ b/demos/mediapipe/multi_model_graph/subconfig.json @@ -0,0 +1,14 @@ +{ + "model_config_list": [ + {"config": { + "name": "dummy", + "base_path": "dummy" + } + }, + {"config": { + "name": "add", + "base_path": "add_two_inputs_model" + } + } + ] +} diff --git a/demos/model_ensemble/python/README.md b/demos/model_ensemble/python/README.md index b07f1bcdeb..e91764d3fe 100644 --- a/demos/model_ensemble/python/README.md +++ b/demos/model_ensemble/python/README.md @@ -3,7 +3,7 @@ This guide shows how to implement a model ensemble using the [DAG Scheduler](../../../docs/dag_scheduler.md). - Let's consider you develop an application to perform image classification. There are many different models that can be used for this task. The goal is to combine results from inferences executed on two different models and calculate argmax to pick the most probable classification label. -- For this task, select two models: [googlenet-v2](https://docs.openvino.ai/2022.2/omz_models_public_googlenet_v2_tf_googlenet_v2_tf.html) and [resnet-50](https://docs.openvino.ai/2022.2/omz_models_public_resnet_50_tf_resnet_50_tf.html). Additionally, create own model **argmax** to combine and select top result. The aim is to perform this task on the server side with no intermediate results passed over the network. The server should take care of feeding inputs/outputs in subsequent models. Both - googlenet and resnet predictions should run in parallel. +- For this task, select two models: [googlenet-v2](https://docs.openvino.ai/2023.0/omz_models_model_googlenet_v2_tf.html) and [resnet-50](https://docs.openvino.ai/2022.2/omz_models_model_resnet_50_tf.html#doxid-omz-models-model-resnet-50-tf). Additionally, create own model **argmax** to combine and select top result. The aim is to perform this task on the server side with no intermediate results passed over the network. The server should take care of feeding inputs/outputs in subsequent models. Both - googlenet and resnet predictions should run in parallel. - Diagram for this pipeline would look like this: ![diagram](model_ensemble_diagram.svg) diff --git a/demos/universal-sentence-encoder/README.md b/demos/universal-sentence-encoder/README.md new file mode 100644 index 0000000000..80f42a2695 --- /dev/null +++ b/demos/universal-sentence-encoder/README.md @@ -0,0 +1,108 @@ +# Using inputs data in string format with universal-sentence-encoder model {#ovms_demo_universal-sentence-encoder} + + +## Download the model + +In this experiment we are going to use a TensorFlow model from [tfhub.dev ](https://tfhub.dev/google/universal-sentence-encoder-multilingual/3). + +```bash +curl --create-dir https://storage.googleapis.com/tfhub-modules/google/universal-sentence-encoder-multilingual/3.tar.gz -o universal-sentence-encoder-multilingual/1/3.tar.gz +tar -xzf universal-sentence-encoder-multilingual/1/3.tar.gz -C universal-sentence-encoder-multilingual/1/ +rm universal-sentence-encoder-multilingual/1/3.tar.gz +chmod -R 755 universal-sentence-encoder-multilingual +tree universal-sentence-encoder-multilingual/ + +universal-sentence-encoder-multilingual/ +└── 1 + ├── assets + ├── saved_model.pb + └── variables + ├── variables.data-00000-of-00001 + └── variables.index + +``` + +## Optionally build OVMS with CPU extension library for sentencepiece_tokenizer layer + +Model universal-sentence-encoder-multilingual includes a layer SentencepieceTokenizer which is not supported by OpenVINO at the moment. It can be however implemented using a [CPU extension](https://github.com/openvinotoolkit/openvino_contrib/tree/master/modules/custom_operations/user_ie_extensions/sentence_piece), which is a dynamic library performing the execution of the model layer. +The layer SentencepieceTokenizer expects on the input a list of strings. The CPU extension replaces the input format to an array with UINT8 precision with a shape `[-1]`. It is serialized representation of the list of strings in a form or bytes. When this extension is deployed in OpenVINO Model Server, you don't need to worry about the serialization as it is handled internally. The model server accepts the input in a string format and performs the conversion to OpenVINO requirement transparently. + +The image `openvino/model_server:2023.0` will include ready to use OpenVINO Model Server with the CPU extension. It can be also built from source using the commands: + +```bash +git clone https://github.com/openvinotoolkit/model_server +cd model_server +make docker_build OV_USE_BINARY=0 +cd .. + +``` + +## Start the model server in a container +When the new docker image is built, you can start the service with a command: +```bash +docker run -d --name ovms -p 9000:9000 -p 8000:8000 -v $(pwd)/universal-sentence-encoder-multilingual:/model openvino/model_server:latest --model_name usem --model_path /model --cpu_extension /ovms/lib/libuser_ov_extensions.so --plugin_config '{"NUM_STREAMS": 1}' --port 9000 --rest_port 8000 +``` + +Check the container logs to confirm successful start: +```bash +docker logs ovms +``` + + +## Send string data as inference request + +OpenVINO Model Server can accept the input in a form of strings. Below is a code snipped based on `tensorflow_serving_api` python library: +```python +data = np.array(["string1", "string1", "string_n"]) +predict_request = predict_pb2.PredictRequest() +predict_request.model_spec.name = "my_model" +predict_request.inputs["input_name"].CopyFrom(make_tensor_proto(data)) +predict_response = prediction_service_stub.Predict(predict_request, 10.0) +``` + +Here is a basic client execution : +```bash +pip install --upgrade pip +pip install -r model_server/demos/universal-sentence-encoder/requirements.txt +python model_server/demos/universal-sentence-encoder/send_strings.py --grpc_port 9000 --string "I enjoy taking long walks along the beach with my dog." +processing time 6.931 ms. +Output shape (1, 512) +Output subset [-0.00552395 0.00599533 -0.01480555 0.01098945 -0.09355522 -0.08445048 + -0.02802683 -0.05219319 -0.0675998 0.03127321 -0.03223499 -0.01282092 + 0.06131846 0.02626886 -0.00983501 0.00298059 0.00141201 0.03229365 + 0.06957124 0.01543707] + +``` + +The same can be achieved using REST API interface and even a simple `curl` command: + +```bash +curl -X POST http://localhost:8000/v1/models/usem:predict \ +-H 'Content-Type: application/json' \ +-d '{"instances": ["dog", "Puppies are nice.", "I enjoy taking long walks along the beach with my dog."]}' +``` + + +## Compare results with TFS + +The same client code can be used to send the requests to TensorFlow Serving component. There is full compatibility in the API. + +Start TFS container: +```bash +docker run -it -p 8500:8500 -p 9500:9500 -v $(pwd)/universal-sentence-encoder-multilingual:/models/usem -e MODEL_NAME=usem tensorflow/serving --port=9500 --rest_api_port=8500 +``` + + +Run the client +```bash +python model_server/demos/universal-sentence-encoder/send_strings.py --grpc_port 9500 --input_name inputs --output_name outputs --string "I enjoy taking long walks along the beach with my dog." + +processing time 12.167000000000002 ms. +Output shape (1, 512) +Output subset [-0.00552387 0.00599531 -0.0148055 0.01098951 -0.09355522 -0.08445048 + -0.02802679 -0.05219323 -0.06759984 0.03127313 -0.03223493 -0.01282088 + 0.06131843 0.02626882 -0.00983502 0.00298053 0.00141208 0.03229369 + 0.06957125 0.01543701] + +``` + diff --git a/demos/universal-sentence-encoder/requirements.txt b/demos/universal-sentence-encoder/requirements.txt new file mode 100644 index 0000000000..da859d5245 --- /dev/null +++ b/demos/universal-sentence-encoder/requirements.txt @@ -0,0 +1,2 @@ +tensorflow-serving-api==2.11.0 + diff --git a/demos/universal-sentence-encoder/send_strings.py b/demos/universal-sentence-encoder/send_strings.py new file mode 100644 index 0000000000..5f4de71da3 --- /dev/null +++ b/demos/universal-sentence-encoder/send_strings.py @@ -0,0 +1,50 @@ +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import grpc +import numpy as np +from tensorflow import make_tensor_proto, make_ndarray, make_tensor_proto +import datetime +import argparse +from tensorflow_serving.apis import predict_pb2 +from tensorflow_serving.apis import prediction_service_pb2_grpc + + +parser = argparse.ArgumentParser(description='Do requests to ie_serving and tf_serving using images in string format') +parser.add_argument('--grpc_address',required=False, default='localhost', help='Specify url to grpc service. default:localhost') +parser.add_argument('--grpc_port',required=False, default=9000, help='Specify port to grpc service. default: 9000') +parser.add_argument('--input_name',required=False, default='inputs', help='Specify input tensor name. default: inputs') +parser.add_argument('--output_name',required=False, default='outputs', help='Specify output name. default: outputs') +parser.add_argument('--model_name', default='usem', help='Define model name, must be same as is in service. default: usem') +parser.add_argument('--string',required=True, default='', help='String to query.') +args = vars(parser.parse_args()) + +channel = grpc.insecure_channel("{}:{}".format(args['grpc_address'],args['grpc_port'])) +stub = prediction_service_pb2_grpc.PredictionServiceStub(channel) + + +data = np.array([args['string']]) +predict_request = predict_pb2.PredictRequest() +predict_request.model_spec.name = args['model_name'] +predict_request.inputs[args['input_name']].CopyFrom(make_tensor_proto(data)) +start_time = datetime.datetime.now() +predict_response = stub.Predict(predict_request, 10.0) +end_time = datetime.datetime.now() +duration = (end_time - start_time).total_seconds() * 1000 +print("processing time", duration, "ms.") +output = make_ndarray(predict_response.outputs[args['output_name']]) +print("Output shape", output.shape) +print("Output subset", output[0, :20]) diff --git a/demos/using_onnx_model/python/config.json b/demos/using_onnx_model/python/config.json index a64d65e51d..c25117b762 100644 --- a/demos/using_onnx_model/python/config.json +++ b/demos/using_onnx_model/python/config.json @@ -7,7 +7,7 @@ "layout": "NHWC:NCHW", "shape": "(1,224,224,3)", "plugin_config": { - "CPU_THROUGHPUT_STREAMS": "1" + "NUM_STREAMS": "1" } } } diff --git a/docs/accelerators.md b/docs/accelerators.md index aa59c101b6..866059477d 100644 --- a/docs/accelerators.md +++ b/docs/accelerators.md @@ -11,63 +11,15 @@ docker run -u $(id -u):$(id -g) -v ${PWD}/models:/models:rw openvino/ubuntu20_de mv ${PWD}/models/public/resnet-50-tf/FP32 ${PWD}/models/public/resnet-50-tf/1 ``` - -## Starting the server with the Intel® Neural Compute Stick 2 - -[Intel Movidius Neural Compute Stick 2](https://software.intel.com/en-us/neural-compute-stick) can be employed by OVMS OpenVINO Model Server via -[the MYRIAD plugin](https://docs.openvino.ai/2022.2/openvino_docs_OV_UG_supported_plugins_MYRIAD.html). It must be visible and accessible on the host machine. - -NCS devices should be reported by the `lsusb` command, printing out `ID 03e7:2485`. - -To start the server with Neural Compute Stick use either of the two options: - -1. Recommended, without the docker privileged mode and mounting only the usb devices. - -@sphinxdirective -.. code-block:: sh - - docker run --rm -it -u 0 --device-cgroup-rule='c 189:* rmw' -v ${PWD}/models/public/resnet-50-tf:/opt/model -v /dev/bus/usb:/dev/bus/usb -p 9001:9001 openvino/model_server \ - --model_path /opt/model --model_name resnet --port 9001 --target_device MYRIAD - -@endsphinxdirective - -2. Less securely, in the docker privileged mode and mounting all devices. - ```bash - docker run --rm -it --net=host -u root --privileged -v ${PWD}/models/public/resnet-50-tf:/opt/model -v /dev:/dev -p 9001:9001 openvino/model_server \ - --model_path /opt/model --model_name resnet --port 9001 --target_device MYRIAD - ``` - -## Starting a Docker Container with HDDL - -To run a container that is using the HDDL accelerator, _hddldaemon_ must be running on the host machine. -You must set up the environment (the OpenVINO package must be pre-installed) and start _hddldaemon_ on the host before starting a container. -Refer to the steps from [OpenVINO installation guides](https://docs.openvino.ai/2022.2/openvino_docs_install_guides_installing_openvino_docker_linux.html#running-the-image-on-intel-vision-accelerator-design-with-intel-movidius-vpus). - -An example of a command starting a server with HDDL: -```bash - -# --device=/dev/ion:/dev/ion mounts the accelerator device -# -v /var/tmp:/var/tmp enables communication with _hddldaemon_ running on the host machine -docker run --rm -it --device=/dev/ion:/dev/ion -v /var/tmp:/var/tmp -v ${PWD}/models/public/resnet-50-tf:/opt/model -p 9001:9001 openvino/model_server:latest \ ---model_path /opt/model --model_name resnet --port 9001 --target_device HDDL -``` - -Check out our recommendations for [throughput optimization on HDDL](performance_tuning.md). - -> **NOTE**: -> the OpenVINO Model Server process within the container communicates with _hddldaemon_ via unix sockets in the `/var/tmp` folder. -> It requires RW permissions in the docker container security context. -> It is recommended to start the docker container in the same context as the account starting _hddldaemon_. For example, if you start the _hddldaemon_ as root, add `--user root` to the `docker run` command. - ## Starting a Docker Container with Intel integrated GPU, Intel® Data Center GPU Flex Series and Intel® Arc™ GPU -The [GPU plugin](https://docs.openvino.ai/2022.2/openvino_docs_OV_UG_supported_plugins_GPU.html) uses the Intel Compute Library for +The [GPU plugin](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_supported_plugins_GPU.html) uses the Intel Compute Library for Deep Neural Networks ([clDNN](https://01.org/cldnn)) to infer deep neural networks. For inference execution, it employs Intel® Processor Graphics including Intel® HD Graphics, Intel® Iris® Graphics, Intel® Iris® Xe Graphics, and Intel® Iris® Xe MAX graphics. Before using GPU as OpenVINO Model Server target device, you need to: -- install the required drivers - refer to [OpenVINO installation guide](https://docs.openvino.ai/2022.2/openvino_docs_install_guides_installing_openvino_from_archive_linux.html#step-4-optional-configure-inference-on-non-cpu-devices) +- install the required drivers - refer to [OpenVINO installation guide](https://docs.openvino.ai/2023.0/openvino_docs_install_guides_installing_openvino_from_archive_linux.html#step-4-optional-configure-inference-on-non-cpu-devices) - start the docker container with the additional parameter of `--device /dev/dri` to pass the device context - set the parameter of `--target_device` to `GPU`. - use the `openvino/model_server:latest-gpu` image, which contains GPU dependencies @@ -107,7 +59,7 @@ Use device `/dev/dxg` instead of `/dev/dri` and mount the volume `/usr/lib/wsl`: @sphinxdirective .. code-block:: sh - docker run --rm -it --device=/dev/dxg --volume /usr/lib/wsl:/usr/lib/wsl --group-add=$(stat -c "%g" /dev/dri/render* | head -n 1) -u $(id -u):$(id -g) \ + docker run --rm -it --device=/dev/dxg --volume /usr/lib/wsl:/usr/lib/wsl -u $(id -u):$(id -g) \ -v ${PWD}/models/public/resnet-50-tf:/opt/model -p 9001:9001 openvino/model_server:latest-gpu \ --model_path /opt/model --model_name resnet --port 9001 --target_device GPU @@ -121,7 +73,7 @@ If you need to build the OpenVINO Model Server with different driver version, re ## Using Multi-Device Plugin If you have multiple inference devices available (e.g. Myriad VPUs and CPU) you can increase inference throughput by enabling the Multi-Device Plugin. -It distributes Inference requests among multiple devices, balancing out the load. For more detailed information read OpenVINO’s [Multi-Device plugin documentation](https://docs.openvino.ai/2022.2/openvino_docs_OV_UG_Running_on_multiple_devices.html) documentation. +It distributes Inference requests among multiple devices, balancing out the load. For more detailed information read OpenVINO’s [Multi-Device plugin documentation](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_Running_on_multiple_devices.html) documentation. To use this feature in OpenVINO Model Server, you can choose one of two ways: @@ -136,7 +88,7 @@ echo '{"model_config_list": [ "name": "resnet", "base_path": "/opt/model", "batch_size": "1", - "target_device": "MULTI:MYRIAD,CPU"} + "target_device": "MULTI:GPU,CPU"} }] }' >> models/public/resnet-50-tf/config.json ``` @@ -153,7 +105,7 @@ openvino/model_server:latest --config_path /opt/model/config.json --port 9001 ```bash docker run -d --net=host -u root --privileged --name ie-serving --rm -v ${PWD}/models/public/resnet-50-tf/:/opt/model:ro -v \ -/dev:/dev -p 9001:9001 openvino/model_server:latest model --model_path /opt/model --model_name resnet --port 9001 --target_device 'MULTI:MYRIAD,CPU' +/dev:/dev -p 9001:9001 openvino/model_server:latest model --model_path /opt/model --model_name resnet --port 9001 --target_device 'MULTI:GPU,CPU' ``` The deployed model will perform inference on both Intel Movidius Neural Compute Stick and CPU. @@ -161,7 +113,7 @@ The total throughput will be roughly equal to the sum of CPU and Intel Movidius ## Using Heterogeneous Plugin -The [HETERO plugin](https://docs.openvino.ai/2022.2/openvino_docs_OV_UG_Hetero_execution.html) makes it possible to distribute inference load of one model +The [HETERO plugin](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_Hetero_execution.html) makes it possible to distribute inference load of one model among several computing devices. That way different parts of the deep learning network can be executed by devices best suited to their type of calculations. OpenVINO automatically divides the network to optimize the process. @@ -183,7 +135,7 @@ echo '{"model_config_list": [ ## Using AUTO Plugin -[Auto Device](https://docs.openvino.ai/2022.2/openvino_docs_IE_DG_supported_plugins_AUTO.html) (or AUTO in short) is a new special “virtual” or “proxy” device in the OpenVINO toolkit, it doesn’t bind to a specific type of HW device. +[Auto Device](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_supported_plugins_AUTO.html) (or AUTO in short) is a new special “virtual” or “proxy” device in the OpenVINO toolkit, it doesn’t bind to a specific type of HW device. AUTO solves the complexity in application required to code a logic for the HW device selection (through HW devices) and then, on the deducing the best optimization settings on that device. AUTO always chooses the best device, if compiling model fails on this device, AUTO will try to compile it on next best device until one of them succeeds. Make sure you have passed the devices and access to the devices you want to use in for the docker image. For example with: @@ -243,16 +195,22 @@ The docker image of OpenVINO Model Server including support for NVIDIA can be bu ```bash git clone https://github.com/openvinotoolkit/model_server.git cd model_server - make docker_build NVIDIA=1 OV_USE_BINARY=0 OV_SOURCE_BRANCH=releases/2022/3 OV_CONTRIB_BRANCH=releases/2022/3 + make docker_build NVIDIA=1 OV_USE_BINARY=0 OV_SOURCE_BRANCH=master OV_CONTRIB_BRANCH=master + cd .. ``` Check also [building from sources](https://github.com/openvinotoolkit/model_server/blob/develop/docs/build_from_source.md). Example command to run container with NVIDIA support: ```bash - docker run -it --gpus all -p 9178:9178 -v ${PWD}/models/public/resnet-50-tf:/opt/model openvino/model_server:latest-cuda --model_path /opt/model --model_name resnet --target_device NVIDIA + docker run -it --gpus all -p 9000:9000 -v ${PWD}/models/public/resnet-50-tf:/opt/model openvino/model_server:latest-cuda --model_path /opt/model --model_name resnet --port 9000 --target_device NVIDIA +``` + +For models with layers not supported on NVIDIA plugin, you can use a vritual pluging `HETERO` which can use multiple devices listed after the colon: +```bash + docker run -it --gpus all -p 9000:9000 -v ${PWD}/models/public/resnet-50-tf:/opt/model openvino/model_server:latest-cuda --model_path /opt/model --model_name resnet --port 9000 --target_device HETERO:NVIDIA,CPU ``` Check the supported [configuration parameters](https://github.com/openvinotoolkit/openvino_contrib/tree/master/modules/nvidia_plugin#supported-configuration-parameters) and [supported layers](https://github.com/openvinotoolkit/openvino_contrib/tree/master/modules/nvidia_plugin#supported-layers-and-limitations) -Currently the AUTO, MULTI and HETERO virual plugins do not support NVIDIA plugin as an alternative device. \ No newline at end of file +Currently the AUTO and MULTI virual plugins do not support NVIDIA plugin as an alternative device. \ No newline at end of file diff --git a/docs/advanced_topics.md b/docs/advanced_topics.md index 1f9ab27931..4804027758 100644 --- a/docs/advanced_topics.md +++ b/docs/advanced_topics.md @@ -19,7 +19,7 @@ Implement any CPU layer, that is not support by OpenVINO yet, as a shared librar [Learn more](../src/example/SampleCpuExtension/README.md) ## Model Cache -Leverage the OpenVINO [model caching](https://docs.openvino.ai/latest/openvino_docs_OV_UG_Model_caching_overview.html) feature to speed up subsequent model loading on a target device. +Leverage the OpenVINO [model caching](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_Model_caching_overview.html) feature to speed up subsequent model loading on a target device. [Learn more](model_cache.md) diff --git a/docs/binary_input.md b/docs/binary_input.md index 67eb8e9fe2..a9a3b349fc 100644 --- a/docs/binary_input.md +++ b/docs/binary_input.md @@ -1,4 +1,4 @@ -# Support for Binary Input Data {#ovms_docs_binary_input} +# Support for Binary Encoded Image Input Data {#ovms_docs_binary_input} @sphinxdirective @@ -20,4 +20,4 @@ automatically from JPEG/PNG to OpenVINO friendly format using built-in [OpenCV]( - [TensorFlow Serving API](./binary_input_tfs.md) - [KServe API](./binary_input_kfs.md) - It's worth noting that with KServe API, you can also send raw data (that does not require processing by OpenCV) in binary form via REST. This makes KServe API more performant choice while working with REST interface. The guide linked above explains how to work with both regular data in binary format as well as JPEG/PNG encoded images. \ No newline at end of file +It's worth noting that with KServe API, you can also send raw data with or without image encoding via REST API. This makes KServe REST API more performant choice comparing to json format in TFS API. The guide linked above explains how to work with both regular data in binary format as well as JPEG/PNG encoded images. \ No newline at end of file diff --git a/docs/binary_input_kfs.md b/docs/binary_input_kfs.md index 3295aad342..f15c153d05 100644 --- a/docs/binary_input_kfs.md +++ b/docs/binary_input_kfs.md @@ -4,21 +4,27 @@ KServe API allows sending the model input data in a variety of formats inside the [InferTensorContents](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#tensor-data-1) objects or in `raw_input_contents` field of [ModelInferRequest](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#inference-1). -When the data is sent in the `bytes_contents` field of `InferTensorContents` and input `datatype` is set to `BYTES`, such input is interpreted as a binary encoded image. The `BYTES` datatype is dedicated to binary encoded **images** and if it's set, the data **must** be placed in `bytes_contents` or in `raw_input_contents` if batch size is equal to 1. +When the data is sent to the model or pipeline that have 4 (or 5 in case of [demultiplexing](demultiplexing.md)) shape dimensions and input `datatype` is set to `BYTES`, such input is interpreted as a binary encoded image. Data of such inputs **must** be placed in `bytes_contents` or in `raw_input_contents` + +If data is located in `raw_input_contents` you need to precede data of every batch by 4 bytes(little endian) conatining size of this batch. For example, if batch would contain three images of sizes 370, 480, 500 bytes the content of raw_input_contents[index_of_the_input] would look like this: +<0x72010000 (=370)><370 bytes of first image><0xE0010000 (=480)><480 bytes of second image> <0xF4010000 (=500)><500 bytes of third image> Note, that while the model metadata reports the inputs shape with layout `NHWC`, the binary data must be sent with shape: `[N]` with datatype: `BYTES`. Where `N` represents number of images converted to string bytes. -When sending data in the array format, the shape and datatype gives information on how to interpret bytes in the contents. For binary encoded data, the only information given by the `shape` field is the amount of images in the batch. On the server side, the bytes in each element of the `bytes_contents` field are loaded, resized to match model input shape and converted to the OpenVINO-friendly array format by OpenCV. +When sending data in the array format, the shape and datatype gives information on how to interpret bytes in the contents. For binary encoded data, the only information given by the `shape` field is the amount of images in the batch. On the server side, the bytes of every batch are loaded, resized to match model input shape and converted to the OpenVINO-friendly array format by OpenCV. ## HTTP ### JPEG / PNG encoded images -KServe API also allows sending binary encoded data via HTTP interface. The tensor binary data is provided in the request body, after JSON object. While the JSON part contains information required to route the data to the target model and run inference properly, the data itself, in the binary format is placed right after the JSON. +KServe API also allows sending encoded images via HTTP interface to the model or pipeline that have 4 (or 5 in case of [demultiplexing](demultiplexing.md)) shape dimensions. Similar to GRPC input with such datatype `datatype` needs to be `BYTES`. The tensor binary data is provided in the request body, after JSON object. While the JSON part contains information required to route the data to the target model and run inference properly, the data itself, in the binary format is placed right after the JSON. Therefore, you need to precede data of every image by 4 bytes(little endian) conatining size of this image and specify their combined size in `binary_data_size` parameter. + +For binary inputs, the `parameters` map in the JSON part contains `binary_data_size` field for each binary input that indicates the size of the data on the input. Since there's no strict limitations on image resolution and format (as long as it can be loaded by OpenCV), images might be of different sizes. To send a batch of images you need to precede data of every batch by 4 bytes(little endian) conatining size of this batch and specify their combined size in `binary_data_size`. For example, if batch would contain three images of sizes 370, 480, 500 bytes the content of input buffer inside binary extension would look like this: +<0x72010000 (=370)><370 bytes of first image><0xE0010000 (=480)><480 bytes of second image> <0xF4010000 (=500)><500 bytes of third image> +And in that case binary_data_size would be 1350(370 + 480 + 500) +Function set_data_from_numpy in triton client lib that we use in our [REST sample](https://github.com/openvinotoolkit/model_server/blob/develop/client/python/kserve-api/samples/http_infer_binary_resnet.py) automatically converts given images to this format. -For binary inputs, the `parameters` map in the JSON part contains `binary_data_size` field for each binary input that indicates the size of the data on the input. Since there's no strict limitations on image resolution and format (as long as it can be loaded by OpenCV), images might be of different sizes. Therefore, to send a batch of different images, specify their sizes in `binary_data_size` field as a list with sizes of all images in the batch. -The list must be formed as a string, so for example, for 3 images in the batch, you may pass - `"9821,12302,7889"`. If the request contains only one input `binary_data_size` parameter can be omitted - in this case whole buffer is treated as a input image. For HTTP request headers, `Inference-Header-Content-Length` header must be provided to give the length of the JSON object, and `Content-Length` continues to give the full body length (as HTTP requires). See an extended example with the request headers, and multiple images in the batch: @@ -42,7 +48,7 @@ For the Raw Data binary inputs `binary_data_size` parameter can be omitted since ## Usage examples -Sample clients that use binary inputs via KFS API can be found here ([REST sample](https://github.com/openvinotoolkit/model_server/blob/develop/client/python/kserve-api/samples/http_infer_binary_resnet.py))/([GRPC sample](https://github.com/openvinotoolkit/model_server/blob/develop/client/python/kserve-api/samples/http_infer_binary_resnet.py)) +Sample clients that use binary inputs via KFS API can be found here ([REST sample](https://github.com/openvinotoolkit/model_server/blob/develop/client/python/kserve-api/samples/http_infer_binary_resnet.py))/([GRPC sample](https://github.com/openvinotoolkit/model_server/blob/develop/client/python/kserve-api/samples/grpc_infer_binary_resnet.py)) Also, see the ([README](https://github.com/openvinotoolkit/model_server/blob/develop/client/python/kserve-api/samples/README.md)) diff --git a/docs/binary_input_tfs.md b/docs/binary_input_tfs.md index c2475e93e5..9d565e415f 100644 --- a/docs/binary_input_tfs.md +++ b/docs/binary_input_tfs.md @@ -5,7 +5,7 @@ TensorFlow Serving API allows sending the model input data in a variety of formats inside the [TensorProto](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor.proto) objects. Array data is passed inside the `tensor_content` field, which represents the input data buffer. -When the data is sent in the `string_val` field, such input is interpreted as a binary encoded image. +When the data is sent in the `string_val` field to the model or pipeline that have 4 (or 5 in case of [demultiplexing](demultiplexing.md)) shape dimensions, such input is interpreted as a binary encoded image. Note, that while the model metadata reports the inputs shape with layout NHWC, the binary data must be sent with shape: [N] with dtype: DT_STRING. Where N represents number of images converted to string bytes. @@ -14,7 +14,7 @@ When sending data in the array format, all bytes are in the same sequence in `te ## HTTP -TensorFlow Serving API also allows sending binary encoded data via HTTP interface. The binary data needs to be Base64 encoded and put into `inputs` or `instances` field as a map in form: +TensorFlow Serving API also allows sending encoded images via HTTP interface to the model or pipeline that have 4 (or 5 in case of [demultiplexing](demultiplexing.md)) shape dimensions. The binary data needs to be Base64 encoded and put into `inputs` or `instances` field as a map in form: ``` : {"b64":} diff --git a/docs/build_from_source.md b/docs/build_from_source.md index 340aa47ef2..97dbd021e5 100644 --- a/docs/build_from_source.md +++ b/docs/build_from_source.md @@ -5,7 +5,7 @@ This document gives information how to build docker images and the binary packag ## Prerequisites 1. [Docker Engine](https://docs.docker.com/engine/) -1. Ubuntu 20.04 or RedHat 8.7 host +1. Ubuntu 20.04, Ubuntu 22.04 or RedHat 8.7 host 1. make 1. bash @@ -14,8 +14,8 @@ This document gives information how to build docker images and the binary packag Makefile located in root directory of this repository contains all targets needed to build docker images and binary packages. It contains `docker_build` target which by default builds multiple docker images: -- `openvino/model_server:latest` - smallest release image containing only neccessary files to run model server on CPU, NCS and HDDL -- `openvino/model_server:latest-gpu` - release image containing support for Intel GPU +- `openvino/model_server:latest` - smallest release image containing only neccessary files to run model server on CPU +- `openvino/model_server:latest-gpu` - release image containing support for Intel GPU and CPU - `openvino/model_server:latest-nginx-mtls` - release image containing examplary NGINX MTLS configuration - `openvino/model_server-build:latest` - image with builder environment containing all the tools to build OVMS @@ -58,6 +58,16 @@ Select base OS: make docker_build BASE_OS=redhat ``` +### `BASE_OS_TAG_UBUNTU` + +Select ubuntu base image version: +- `20.04` ubuntu:20.04 (default value) +- `22.04` ubuntu:22.04 + +```bash +make docker_build BASE_OS_TAG_UBUNTU=22.04 +``` +
Example: @@ -67,8 +77,9 @@ Example: Parameter used to control which GPU driver version will be installed. Supported versions: | OS | Versions | |---|---| -| Ubuntu | 22.35.24055 (default),
22.10.22597,
21.48.21782,
20.35.17767 | -| RedHat | 22.28.23726 (default),
22.10.22597,
21.38.21026,
20.35.17767 | +| Ubuntu22 | 23.13.26032 (default),
22.35.24055,
22.10.22597,
21.48.21782 | +| Ubuntu20 | 22.43.24595 (default),
22.35.24055,
22.10.22597,
21.48.21782 | +| RedHat | 22.43.24595 (default),
22.28.23726,
22.10.22597,
21.38.21026 | Additionally it is possible to specify custom (pre-production) drivers by providing location to NEO Runtime packages on local disk. Contact Intel representative to get the access to the pre-production drivers. Warning: _Maintained only for Ubuntu base OS._ @@ -111,23 +122,64 @@ Use together with `OV_CONTRIB_BRANCH` to specify which branch from [OpenVINO con Example: ```bash -make docker_build NVIDIA=1 OV_USE_BINARY=0 OV_SOURCE_BRANCH=releases/2022/3 OV_CONTRIB_BRANCH=releases/2022/3 -``` -```bash -docker run -it --gpus all -p 9178:9178 -v ${PWD}/models/public/resnet-50-tf:/opt/model openvino/model_server:latest-cuda --model_path /opt/model --model_name resnet --target_device NVIDIA +make docker_build NVIDIA=1 OV_USE_BINARY=0 OV_SOURCE_BRANCH=master OV_CONTRIB_BRANCH=master ``` +Note. In order to build the image with redhat UBI8.7 as the base os, it is required to use a host with RedHat subscription and entitlements in `/etc/pki/entitlement` and `/etc/rhsm`. +That is required to install several building dependencies. +
### `OV_USE_BINARY` By default set to `1`. When set to `0`, OpenVINO will be built from sources and `DLDT_PACKAGE_URL` will be omitted. -Use `OV_SOURCE_BRANCH` to select [OpenVINO repository](https://github.com/openvinotoolkit/openvino) branch. By default `master` will be used. -Warning: _Maintained only for Ubuntu base OS._ +Use `OV_SOURCE_BRANCH` and `OV_SOURCE_ORG` to select [OpenVINO repository](https://github.com/openvinotoolkit/openvino) branch and fork. By default `master` will be used and org `openvinotoolkit`. + +Example: +```bash +make docker_build OV_USE_BINARY=0 OV_SOURCE_BRANCH= OV_SOURCE_ORG= +``` + +### `RUN_TESTS` + +Enables or disabled unit tests execution as part of the docker image building. +- `0` Unit tests are skipped +- `1` Unit tests are executed (default) + +```bash +make docker_build RUN_TESTS=0 +``` + +Running the unit tests will make the building last longer and it will consume a bit more RAM + +### `CHECK_COVERAGE` + +Enables or disabled calculating the unit tests coverage as part of the docker image building. +- `0` Checking the coverage is skipped +- `1` Checking the coverage is included + +```bash +make docker_build RUN_TESTS=0 +``` + +Running the unit tests will increase build time and consume more RAM + +### `JOBS` + +Number of compilation jobs. By default it is set to the number of CPU cores. On hosts with low RAM, this value can be reduced to avoid out of memory errors during the compilation. + +```bash +make docker_build JOBS=2 +``` +
+ +### `MEDIAPIPE_DISABLE` + +When set to `0`, OpenVINO&trade Model Server will be built with [MediaPipe](mediapipe.md) support. Default value: `0`. Example: ```bash -make docker_build OV_USE_BINARY=0 OV_SOURCE_BRANCH= +make docker_build MEDIAPIPE_DISABLE=0 ``` - + Read more detailed usage in [developer guide](https://github.com/openvinotoolkit/model_server/blob/develop/docs/developer_guide.md). diff --git a/docs/clients_kfs.md b/docs/clients_kfs.md index 2161ac2344..0c1e20c211 100644 --- a/docs/clients_kfs.md +++ b/docs/clients_kfs.md @@ -331,50 +331,54 @@ When creating a Python-based client application, you can use Triton client libra .. code-block:: python - from tritonclient.grpc import service_pb2, service_pb2_grpc - from tritonclient.utils import * + import tritonclient.grpc as grpclient - client = grpcclient.InferenceServerClient("localhost:9000") + triton_client = grpclient.InferenceServerClient( + url="address", + ssl=False, + verbose=False) + image_data = [] with open("image_path", 'rb') as f: image_data.append(f.read()) inputs = [] - inputs.append(service_pb2.ModelInferRequest().InferInputTensor()) - inputs[0].name = args['input_name'] - inputs[0].datatype = "BYTES" - inputs[0].shape.extend([1]) - inputs[0].contents.bytes_contents.append(image_data[0]) + inputs.append(grpclient.InferInput('input_name', 1, "BYTES")) + nmpy = np.array(image_data , dtype=np.object_) + inputs[0].set_data_from_numpy(nmpy) outputs = [] - outputs.append(service_pb2.ModelInferRequest().InferRequestedOutputTensor()) - outputs[0].name = "output_name" + outputs.append(grpclient.InferRequestedOutput("output_name")) - request = service_pb2.ModelInferRequest() - request.model_name = "model_name'" - request.inputs.extend(inputs) - request.outputs.extend(outputs) - response = grpc_stub.ModelInfer(request) + results = triton_client.infer(model_name="model_name", + inputs=inputs, + outputs=outputs) .. tab:: python [REST] .. code-block:: python - import requests - import json - - url = f"http://{address}/v2/models/{model_name}/infer" - http_session = requests.session() + import tritonclient.http as httpclient + triton_client = httpclient.InferenceServerClient( + url="address", + ssl=False, + ssl_options=None, + verbose=False) + image_data = [] - image_binary_size = [] with open("image_path", 'rb') as f: image_data.append(f.read()) - image_binary_size.append(len(image_data[-1])) - image_binary_size_str = ",".join(map(str, image_binary_size)) - inference_header = {"inputs":[{"name":input_name,"shape":[batch_i],"datatype":"BYTES","parameters":{"binary_data_size":image_binary_size_str}}]} - inference_header_binary = json.dumps(inference_header).encode() + inputs = [] + inputs.append(httpclient.InferInput('input_name', 1, "BYTES")) + nmpy = np.array(image_data , dtype=np.object_) + inputs[0].set_data_from_numpy(nmpy) - results = http_session.post(url, inference_header_binary + b''.join(image_data), headers={"Inference-Header-Content-Length":str(len(inference_header_binary))}) + outputs = [] + outputs.append(httpclient.InferRequestedOutput("output_name")) + + results = triton_client.infer(model_name="model_name", + inputs=inputs, + outputs=outputs) .. tab:: cpp [GRPC] @@ -387,29 +391,26 @@ When creating a Python-based client application, you can use Triton client libra std::unique_ptr client; tc::InferenceServerGrpcClient::Create(&client, "localhost:9000"); - std::vector shape{1, 10}; + std::vector shape{1}; tc::InferInput* input; - tc::InferInput::Create(&input, "input_name", shape, "FP32"); + tc::InferInput::Create(&input, "input_name", shape, "BYTES"); std::shared_ptr input_ptr; input_ptr.reset(input) - std::ifstream fileImg("image_path", std::ios::binary); - fileImg.seekg(0, std::ios::end); - int bufferLength = fileImg.tellg(); - fileImg.seekg(0, std::ios::beg); - - char* buffer = new char[bufferLength]; - fileImg.read(buffer, bufferLength); + std::ifstream file(fileName, std::ios::binary); + file.unsetf(std::ios::skipws); + std::streampos fileSize; - std::vector input_data = std::vector(buffer, buffer + bufferLength); - input_ptr->AppendRaw(input_data); + file.seekg(0, std::ios::end); + fileSize = file.tellg(); + file.seekg(0, std::ios::beg); + std::ostringstream oss; + oss << file.rdbuf(); + input_ptr->AppendFromString({oss.str()}); tc::InferOptions options("model_name"); tc::InferResult* result; client->Infer(&result, options, inputs); - input->Reset(); - - delete buffer; } .. tab:: cpp [REST] @@ -425,27 +426,24 @@ When creating a Python-based client application, you can use Triton client libra std::vector shape{1}; tc::InferInput* input; - tc::InferInput::Create(&input, input_name, shape, "BYTES"); + tc::InferInput::Create(&input, "input_name", shape, "BYTES"); std::shared_ptr input_ptr; input_ptr.reset(input) - std::ifstream fileImg("image_path", std::ios::binary); - fileImg.seekg(0, std::ios::end); - int bufferLength = fileImg.tellg(); - fileImg.seekg(0, std::ios::beg); - - char* buffer = new char[bufferLength]; - fileImg.read(buffer, bufferLength); + std::ifstream file(fileName, std::ios::binary); + file.unsetf(std::ios::skipws); + std::streampos fileSize; - std::vector input_data = std::vector(buffer, buffer + bufferLength); - input_ptr->AppendRaw(input_data); + file.seekg(0, std::ios::end); + fileSize = file.tellg(); + file.seekg(0, std::ios::beg); + std::ostringstream oss; + oss << file.rdbuf(); + input_ptr->AppendFromString({oss.str()}); tc::InferOptions options("model_name"); tc::InferResult* result; client->Infer(&result, options, inputs); - input->Reset(); - - delete buffer; } .. tab:: java @@ -470,8 +468,10 @@ When creating a Python-based client application, you can use Triton client libra input.addShape(1); FileInputStream imageStream = new FileInputStream("image_path"); - request.clearRawInputContents(); - request.addRawInputContents(ByteString.readFrom(imageStream)); + InferTensorContents.Builder input_data = InferTensorContents.newBuilder(); + input_data.addBytesContents(ByteString.readFrom(imageStream)); + input.setContents(input_data); + request.addInputs(0, input); ModelInferResponse response = grpc_stub.modelInfer(request.build()); @@ -488,20 +488,24 @@ When creating a Python-based client application, you can use Triton client libra ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - inferInputs := []*grpc_client.ModelInferRequest_InferInputTensor{ - &grpc_client.ModelInferRequest_InferInputTensor{ - Name: "0", - Datatype: "BYTES", - Shape: []int64{1}, - }, + bytes, err := ioutil.ReadFile(fileName) + contents := grpc_client.InferTensorContents{} + contents.BytesContents = append(contents.BytesContents, bytes) + + inferInput := grpc_client.ModelInferRequest_InferInputTensor{ + Name: "0", + Datatype: "BYTES", + Shape: []int64{1}, + Contents: &contents, } - bytes, err := ioutil.ReadFile(fileName) - modelInferRequest.RawInputContents = append(modelInferRequest.RawInputContents, bytes) + inferInputs := []*grpc_client.ModelInferRequest_InferInputTensor{ + &inferInput, + } modelInferRequest := grpc_client.ModelInferRequest{ - ModelName: "model_name", - ModelVersion: "model_version", + ModelName: modelName, + ModelVersion: modelVersion, Inputs: inferInputs, } @@ -514,6 +518,9 @@ When creating a Python-based client application, you can use Triton client libra echo -n '{"inputs” : [{"name" : "0", "shape" : [1], "datatype" : "BYTES"}]}' > request.json stat --format=%s request.json 66 + printf "%x\n" `stat -c "%s" ./image.jpeg` + 1c21 + echo -n -e '\x21\x1c\x00\x00' >> request.json cat ./image.jpeg >> request.json curl --data-binary "@./request.json" -X POST http://localhost:8000/v2/models/resnet/versions/0/infer -H "Inference-Header-Content-Length: 66" @@ -546,7 +553,7 @@ When creating a Python-based client application, you can use Triton client libra data = np.array([1.0, 2.0, ..., 1000.0]) infer_input = httpclient.InferInput("input_name", data.shape, "FP32") infer_input.set_data_from_numpy(data) - results = client.infer("model_name", [infer_input] + results = client.infer("model_name", [infer_input]) .. tab:: cpp [GRPC] @@ -563,7 +570,7 @@ When creating a Python-based client application, you can use Triton client libra tc::InferInput* input; tc::InferInput::Create(&input, "input_name", shape, "FP32"); std::shared_ptr input_ptr; - input_ptr.reset(input) + input_ptr.reset(input); std::vector input_data(10); for (size_t i = 0; i < 10; ++i) { @@ -592,7 +599,7 @@ When creating a Python-based client application, you can use Triton client libra tc::InferInput* input; tc::InferInput::Create(&input, "input_name", shape, "FP32"); std::shared_ptr input_ptr; - input_ptr.reset(input) + input_ptr.reset(input); std::vector input_data(10); for (size_t i = 0; i < 10; ++i) { @@ -684,4 +691,98 @@ When creating a Python-based client application, you can use Triton client libra @endsphinxdirective +### Request Prediction on a string + +@sphinxdirective +.. tab:: python [GRPC] + + .. code-block:: python + + import numpy as np + import tritonclient.grpc as grpcclient + + client = grpcclient.InferenceServerClient("localhost:9000") + data = "" + input = np.array([data.encode('utf-8')], dtype=np.object_) + infer_input = grpcclient.InferInput("input_name", [1], "BYTES") + infer_input.set_data_from_numpy(input) + results = client.infer("model_name", [infer_input]) + +.. tab:: python [REST] + + .. code-block:: python + + import numpy as np + import tritonclient.http as httpclient + + client = httpclient.InferenceServerClient("localhost:9000") + data = "" + input = np.array([data.encode('utf-8')], dtype=np.object_) + infer_input = httpclient.InferInput("input_name", [1], "BYTES") + infer_input.set_data_from_numpy(input) + results = client.infer("model_name", [infer_input]) + +.. tab:: cpp [GRPC] + + .. code-block:: cpp + + #include "grpc_client.h" + + namespace tc = triton::client; + + int main(int argc, char** argv) { + std::unique_ptr client; + tc::InferenceServerGrpcClient::Create(&client, "localhost:9000"); + + tc::InferInput* input; + tc::InferInput::Create(&input, "input_name", {1}, "BYTES"); + std::shared_ptr input_ptr; + input_ptr.reset(input); + input_ptr->AppendFromString({std::string("")}); + std::vector inputs = {input_ptr.get()}; + + tc::InferOptions options("model_name"); + tc::InferResult* results; + client->Infer(&results, options, inputs); + std::shared_ptr results_ptr; + results_ptr.reset(results); + return 0; + } + +.. tab:: cpp [REST] + + .. code-block:: cpp + + #include "http_client.h" + + namespace tc = triton::client; + + int main(int argc, char** argv) { + std::unique_ptr client; + tc::InferenceServerHttpClient::Create(&client, "localhost:9000"); + + tc::InferInput* input; + tc::InferInput::Create(&input, "input_name", {1}, "BYTES"); + std::shared_ptr input_ptr; + input_ptr.reset(input); + input_ptr->AppendFromString({std::string("")}); + std::vector inputs = {input_ptr.get()}; + + tc::InferOptions options("model_name"); + tc::InferResult* results; + client->Infer(&results, options, inputs); + std::shared_ptr results_ptr; + results_ptr.reset(results); + return 0; + } + +.. tab:: curl + + .. code-block:: sh + + curl -X POST http://localhost:9000/v2/models/model_name/infer + -H 'Content-Type: application/json' + -d '{"inputs" : [ {"name" : "input_name", "shape" : [ 1 ], "datatype" : "BYTES", "data" : [""]} ]}' + +@endsphinxdirective For complete usage examples see [Kserve samples](https://github.com/openvinotoolkit/model_server/tree/develop/client/python/kserve-api/samples). diff --git a/docs/clients_tfs.md b/docs/clients_tfs.md index e65b78b8bd..4e1b07c57a 100644 --- a/docs/clients_tfs.md +++ b/docs/clients_tfs.md @@ -285,6 +285,60 @@ When creating a Python-based client application, there are two packages on PyPi @endsphinxdirective +### Request Prediction on a string + +@sphinxdirective + +.. tab:: ovmsclient [GRPC] + + .. code-block:: python + + from ovmsclient import make_grpc_client + + client = make_grpc_client("localhost:9000") + data = [""] + inputs = {"input_name": data} + results = client.predict(inputs=inputs, model_name="my_model") + +.. tab:: ovmsclient [REST] + + .. code-block:: python + + from ovmsclient import make_http_client + + client = make_http_client("localhost:8000") + + data = [""] + inputs = {"input_name": data} + results = client.predict(inputs=inputs, model_name="my_model") + +.. tab:: tensorflow-serving-api + + .. code-block:: python + + import grpc + from tensorflow_serving.apis import prediction_service_pb2_grpc, predict_pb2 + from tensorflow import make_tensor_proto + + channel = grpc.insecure_channel("localhost:9000") + prediction_service_stub = prediction_service_pb2_grpc.PredictionServiceStub(channel) + + data = [""] + predict_request = predict_pb2.PredictRequest() + predict_request.model_spec.name = "my_model" + predict_request.inputs["input_name"].CopyFrom(make_tensor_proto(data)) + predict_response = prediction_service_stub.Predict(predict_request, 1) + results = predict_response.outputs["output_name"] + +.. tab:: curl + + .. code-block:: sh + + curl -X POST http://localhost:8000/v1/models/my_model:predict + -H 'Content-Type: application/json' + -d '{"instances": [{"input_name": ""}]}' + +@endsphinxdirective For complete usage examples see [ovmsclient samples](https://github.com/openvinotoolkit/model_server/tree/releases/2022/1/client/python/ovmsclient/samples). @sphinxdirective diff --git a/docs/custom_node_development.md b/docs/custom_node_development.md index aba0516bd6..cb7798c531 100644 --- a/docs/custom_node_development.md +++ b/docs/custom_node_development.md @@ -123,6 +123,27 @@ Just add include statement like: #include "opencv2/core.hpp" ``` +## String support +There are special consideration when handling in the custom nodes the input sent by the clients as string. Such data when received by the OVMS frontend, is automatically converted to a 2D array with shape [-1,-1]. Example of custom node using this feature is our [Tokenizer](https://github.com/openvinotoolkit/model_server/tree/develop/src/custom_nodes/tokenizer). + +### inputs +When strings are send to the custom node that has 2-dimensional shape and U8 precision OVMS, after receiving request containig such inputs converts them to the 2 dimensional U8 array of shape [number of strings, length of the longest string + 1] with padding filled with zeros. For example batch of three strings ["String_123", "", "zebra"] would be converted to: +``` +['S', 't', 'r', 'i', 'n', 'g', '_', '1', '2', '3', 0, // String_123 +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // "" +'z', 'e', 'b', 'r', 'a', 0, 0, 0, 0, 0, 0] // "zebra" +``` + +### outputs + +When the name of the custom node output is suffixed with _string, its shape has 2 dimensions and precision is U8 OVMS treats data of such output as array that contains string in every row with padding filled with zeros and convert automatically data of such outputs to strings. For example U8 array: +``` +['S', 't', 'r', 'i', 'n', 'g', '_', '1', '2', '3', 0, // String_123 +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // "" +'z', 'e', 'b', 'r', 'a', 0, 0, 0, 0, 0, 0] // "zebra" +``` +would be converted to ["String_123", "", "zebra"]. + ## Building Custom node library can be compiled using any tool. It is recommended to follow the example based diff --git a/docs/deploying_server.md b/docs/deploying_server.md index e52ca2e180..fd60c0560f 100644 --- a/docs/deploying_server.md +++ b/docs/deploying_server.md @@ -13,7 +13,7 @@ This is a step-by-step guide on how to deploy OpenVINO™ Model Server on Li - [Docker Engine](https://docs.docker.com/engine/) installed - Intel® Core™ processor (6-13th gen.) or Intel® Xeon® processor (1st to 4th gen.) - Linux, macOS or Windows via [WSL](https://docs.microsoft.com/en-us/windows/wsl/) -- (optional) AI accelerators [supported by OpenVINO](https://docs.openvino.ai/2022.2/openvino_docs_IE_DG_supported_plugins_Supported_Devices.html). Accelerators are tested only on bare-metal Linux hosts. +- (optional) AI accelerators [supported by OpenVINO](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_Working_with_devices.html). Accelerators are tested only on bare-metal Linux hosts. ### Launch Model Server Container @@ -74,37 +74,95 @@ If everything is set up correctly, you will see 'zebra' prediction in the output ## Deploying Model Server on Baremetal (without container) It is possible to deploy Model Server outside of container. -To deploy Model Server on baremetal, use pre-compiled binaries for Ubuntu20 or RHEL8. -Find latest binary package in [release](https://github.com/openvinotoolkit/model_server/releases) page. -Alternatively it is possible to build package from source: +To deploy Model Server on baremetal, use pre-compiled binaries for Ubuntu20, Ubuntu22 or RHEL8. -```bash -git clone https://github.com/openvinotoolkit/model_server -cd model_server -make docker_build -``` +@sphinxdirective -The `ovms.tar.gz` package will appear in `dist/ubuntu` or `dist/redhat` directory. -Unpack the package: +.. tab:: Ubuntu 20.04 -```bash -tar -xzvf dist/ubuntu/ovms.tar.gz -``` + Download precompiled package: + + .. code-block:: sh -Install required libraries depending on the OS. -For Ubuntu 20.04: -```bash -apt update -y && apt install -y libpugixml1v5 libtbb2 -``` -For RedHat 8.7: -```bash -microdnf install -y pkg-config && rpm -ivh https://vault.centos.org/centos/8/AppStream/x86_64/os/Packages/tbb-2018.2-9.el8.x86_64.rpm -``` + wget https://github.com/openvinotoolkit/model_server/releases/download/v2023.0/ovms_ubuntu20.tar.gz + + or build it yourself: + + .. code-block:: sh + + # Clone the model server repository + git clone https://github.com/openvinotoolkit/model_server + cd model_server + # Build docker images (the binary is one of the artifacts) + make docker_build + # Unpack the package + tar -xzvf dist/ubuntu/ovms.tar.gz + + Install required libraries: + + .. code-block:: sh + + sudo apt update -y && apt install -y libpugixml1v5 libtbb2 + +.. tab:: Ubuntu 22.04 + + Download precompiled package: + + .. code-block:: sh + + wget https://github.com/openvinotoolkit/model_server/releases/download/v2023.0/ovms_ubuntu22.tar.gz + + or build it yourself: + + .. code-block:: sh + + # Clone the model server repository + git clone https://github.com/openvinotoolkit/model_server + cd model_server + # Build docker images (the binary is one of the artifacts) + make docker_build BASE_OS_TAG_UBUNTU=22.04 + # Unpack the package + tar -xzvf dist/ubuntu/ovms.tar.gz + + Install required libraries: + + .. code-block:: sh + + sudo apt update -y && apt install -y libpugixml1v5 + +.. tab:: RHEL 8.7 + + Download precompiled package: + + .. code-block:: sh + + wget https://github.com/openvinotoolkit/model_server/releases/download/v2023.0/ovms_redhat.tar.gz + + or build it yourself: + + .. code-block:: sh + + # Clone the model server repository + git clone https://github.com/openvinotoolkit/model_server + cd model_server + # Build docker images (the binary is one of the artifacts) + make docker_build BASE_OS=redhat + # Unpack the package + tar -xzvf dist/redhat/ovms.tar.gz + + Install required libraries: + + .. code-block:: sh + + sudo dnf install -y pkg-config && sudo rpm -ivh https://vault.centos.org/centos/8/AppStream/x86_64/os/Packages/tbb-2018.2-9.el8.x86_64.rpm + +@endsphinxdirective Start the server: ```bash wget https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.1/models_bin/2/resnet50-binary-0001/FP32-INT1/resnet50-binary-0001.{xml,bin} -P models/resnet50/1 + ./ovms/bin/ovms --model_name resnet --model_path models/resnet50 ``` @@ -115,7 +173,7 @@ Learn more about model server [starting parameters](parameters.md). > **NOTE**: > When serving models on [AI accelerators](accelerators.md), some additional steps may be required to install device drivers and dependencies. -> Learn more in the [Additional Configurations for Hardware](https://docs.openvino.ai/latest/openvino_docs_install_guides_configurations_header.html) documentation. +> Learn more in the [Additional Configurations for Hardware](https://docs.openvino.ai/2023.0/openvino_docs_install_guides_configurations_header.html) documentation. ## Deploying Model Server in Kubernetes diff --git a/docs/dynamic_shape_dynamic_model.md b/docs/dynamic_shape_dynamic_model.md index aae42a41be..8b1c71e5a3 100644 --- a/docs/dynamic_shape_dynamic_model.md +++ b/docs/dynamic_shape_dynamic_model.md @@ -8,7 +8,7 @@ Enable dynamic shape by setting the `shape` parameter to range or undefined: - `--shape "(1,3,200:500,200:500)"` when model is supposed to support height and width values in a range of 200-500. Note that any dimension can support range of values, height and width are only examples here. > Note that some models do not support dynamic dimensions. Learn more about supported model graph layers including all limitations -on [Shape Inference Document](https://docs.openvino.ai/2022.2/openvino_docs_IE_DG_ShapeInference.html). +on [Shape Inference Document](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_ShapeInference.html). Another option to use dynamic shape feature is to export the model with dynamic dimension using Model Optimizer. OpenVINO Model Server will inherit the dynamic shape and no additional settings are needed. diff --git a/docs/features.md b/docs/features.md index 22e392ae0e..af13d1dee6 100644 --- a/docs/features.md +++ b/docs/features.md @@ -8,6 +8,7 @@ ovms_docs_dag ovms_docs_binary_input + ovms_docs_text ovms_docs_model_version_policy ovms_docs_shape_batch_layout ovms_docs_online_config_changes @@ -16,6 +17,7 @@ ovms_docs_dynamic_input ovms_docs_c_api ovms_docs_advanced + ovms_docs_mediapipe @endsphinxdirective diff --git a/docs/mediapipe.md b/docs/mediapipe.md new file mode 100644 index 0000000000..7d7772824b --- /dev/null +++ b/docs/mediapipe.md @@ -0,0 +1,194 @@ +# Integration with mediapipe (preview) {#ovms_docs_mediapipe} + +@sphinxdirective + +.. toctree:: + :maxdepth: 1 + :hidden: + + ovms_docs_mediapipe_calculators + +@endsphinxdirective + +## Introduction +MediaPipe is an open-source framework for building pipelines to perform inference over arbitrary sensory data. Using MediaPipe in the OVMS enables user to define a powerful graph from a lot of ready calculators/nodes that come with the MediaPipe which support all the needed features for running a stable graph like e.g. flow limiter node. User can also run the graph in a server or run it inside application host. Here can be found more information about [MediaPipe framework ](https://developers.google.com/mediapipe/framework/framework_concepts/overview) + +This guide gives information about: + +* How to build OVMS with MediaPipe support +* OVMS Calculators +* Graph proto files +* Configuration files +* Using the mediapipe graphs +* Graphs examples +* Current Limitations + +## How to build OVMS with mediapipe support +Building OVMS with mediapipe support requires passing additional flag for make command, for example: + +``` +MEDIAPIPE_DISABLE=0 make docker_build +``` + +More information about OVMS build parameters can be found here [here](https://github.com/openvinotoolkit/model_server/blob/develop/docs/build_from_source.md). + +## Node Types + +"Each calculator is a node of a graph. The bulk of graph execution happens inside its calculators. OVMS has its own calculators but can also use newly developed calculators or reuse the existing calculators defined in the original mediapipe repository." + +For more details you can visit mediapipe concept description - [Calculators Concept Page](https://developers.google.com/mediapipe/framework/framework_concepts/calculators) or OVMS specific calculators implementation - [Ovms Calculators Concept Page](https://github.com/openvinotoolkit/model_server/blob/releases/2023/0/src/mediapipe_calculators/calculators.md) + +## Graph proto files + +Graph proto files are used to define a graph. Example content of proto file with graph containing ModelAPICalculator nodes: + +``` + +input_stream: "in1" +input_stream: "in2" +output_stream: "out" +node { + calculator: "ModelAPISessionCalculator" + output_side_packet: "SESSION:dummy" + node_options: { + [type.googleapis.com / mediapipe.ModelAPIOVMSSessionCalculatorOptions]: { + servable_name: "dummy" + servable_version: "1" + } + } +} +node { + calculator: "ModelAPISessionCalculator" + output_side_packet: "SESSION:add" + node_options: { + [type.googleapis.com / mediapipe.ModelAPIOVMSSessionCalculatorOptions]: { + servable_name: "add" + servable_version: "1" + } + } +} +node { + calculator: "ModelAPISideFeedCalculator" + input_side_packet: "SESSION:dummy" + input_stream: "DUMMY_IN:in1" + output_stream: "DUMMY_OUT:dummy_output" + node_options: { + [type.googleapis.com / mediapipe.ModelAPIInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "DUMMY_IN" + value: "b" + } + tag_to_output_tensor_names { + key: "DUMMY_OUT" + value: "a" + } + } + } +} +node { + calculator: "ModelAPISideFeedCalculator" + input_side_packet: "SESSION:add" + input_stream: "ADD_INPUT1:dummy_output" + input_stream: "ADD_INPUT2:in2" + output_stream: "SUM:out" + node_options: { + [type.googleapis.com / mediapipe.ModelAPIInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "ADD_INPUT1" + value: "input1" + } + tag_to_input_tensor_names { + key: "ADD_INPUT2" + value: "input2" + } + tag_to_output_tensor_names { + key: "SUM" + value: "sum" + } + } + } +} + +``` + + +Here can be found more information about [MediaPipe graphs proto](https://developers.google.com/mediapipe/framework/framework_concepts/graphs) + +## Configuration files +MediaPipe graph configuration is to be placed in the same json file like the +[models config file](starting_server.md). +While models are defined in section `model_config_list`, graphs are configured in +the `mediapipe_config_list` section. + +Basic graph section template is depicted below: + +``` + +{ + "model_config_list": [...], + "mediapipe_config_list": [ + { + "name":"mediaDummy", + "base_path":"/mediapipe/graphs/", + "graph_path":"graphdummyadapterfull.pbtxt", + "subconfig":"subconfig_dummy.json" + } + ] +} + +``` + +Basic subconfig: + +``` + +{ + "model_config_list": [ + {"config": { + "name": "dummy", + "base_path": "/models/dummy", + "shape": "(1, 10)" + } + } + ] +} + + +``` +Nodes in the MediaPipe graphs can reference both to the models configured in model_config_list section and in subconfig. + +### MediaPipe configuration options explained + +|Option|Type|Description|Required| +|:---|:---|:---|:---| +|`"name"`|string|Graph identifier related to name field specified in gRPC/REST request|Yes| +|`"base_path"`|string|Path to the which graph definition and subconfig files paths are relative. May be absolute or relative to the main config path. Default value is "(main config path)\"|No| +|`"graph_path"`|string|Path to the graph proto file. May be absolute or relative to the base_path. Default value is "(base_path)\graph.pbtxt". File have to exist.|No| +|`"subconfig"`|string|Path to the subconfig file. May be absolute or relative to the base_path. Default value is "(base_path)\subconfig.json". Missing file does not result in error.|No| + +Subconfig file may only contain *model_config_list* section - in the same format as in [models config file](starting_server.md). + +## Using Mediapipe + +MediaPipe graphs can use the same KServe Inference API as the models. There are exactly the same calls for running +the predictions. The request format must match the pipeline definition inputs. + +Graphs can be queried for their state using the calls [GetModelStatus](model_server_grpc_api_kfs.md) +and [REST Model Status](model_server_rest_api_kfs.md) + +## MediaPipe Graps Examples + +[Image classification](../demos/mediapipe/image_classification/README.md) + +[Multi model](../demos/mediapipe/multi_model_graph/README.md) + +## Current limitations +- It is preview version of the MediaPipe integrations which means that its not ready to be used in production and only some of the OVMS features are supported for Mediapipe graphs. + +- Mediapipe graphs are supported only for GRPC KFS API. Only TFS calls supported are get model status and config reload. + +- Binary inputs are not supported for MediaPipe graphs. + +- Public images do not include mediapipe feature. + +- Making changes in subconfig file does not trigger config reloads. Main config changes are monitored and triggers subconfig reload even if those weren't changed. diff --git a/docs/model_cache.md b/docs/model_cache.md index 00d493a175..d409f948a3 100644 --- a/docs/model_cache.md +++ b/docs/model_cache.md @@ -1,7 +1,7 @@ # Model Cache {#ovms_docs_model_cache} ## Overview -The Model Server can leverage a [OpenVINO™ model cache functionality](https://docs.openvino.ai/2022.2/openvino_docs_IE_DG_Model_caching_overview.html), to speed up subsequent model loading on a target device. +The Model Server can leverage a [OpenVINO™ model cache functionality](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_Model_caching_overview.html), to speed up subsequent model loading on a target device. The cached files make the Model Server initialization usually faster. The boost depends on a model and a target device. The most noticable improvement will be observed with GPU devices. On other devices, like CPU, it is possible to observe no speed up effect or even slower loading process depending on used model. Test the setup before final deployment. @@ -42,8 +42,6 @@ In case there are valid reasons to enable the model cache also for models with a > IMPORTANT: Models imported via the custom loaders never create or use any cache. -> IMPORTANT: Model cache can't be used with HDDL devices. - ## Use case example ### Prepare model diff --git a/docs/model_server_c_api.md b/docs/model_server_c_api.md index b62e0411d6..eca9d5b4bc 100644 --- a/docs/model_server_c_api.md +++ b/docs/model_server_c_api.md @@ -11,6 +11,12 @@ Server functionalities are encapsulated in shared library built from OVMS source Calling a method to start the model serving in your application initiates the OVMS as a separate thread. Then you can schedule inference both directly from app using C API and gRPC/HTTP endpoints. +API is versioned according to [SemVer 2.0](https://semver.org/). Calling `OVMS_ApiVersion` it is possible to get `major` and `minor` version number. +- major - incremented when new, backward incompatible changes are introduced to the API itself (API call removal, name change, parameter change) +- minor - incremented when API is modified but backward compatible (new API call added) + +There is no patch version number. Underlying functionality changes not related to API itself are tracked via OVMS version. OVMS and OpenVINO versions can be tracked via logs or `ServerMetadata` request (via KServe API). + ### Server configuration and start To start OVMS you need to create `OVMS_Server` object using `OVMS_ServerNew`, with set of `OVMS_ServerSettings` and `OVMS_ModelsSettings` that describe how the server should be configured. Once the server is started using `OVMS_ServerStartFromConfigurationFile` you can schedule the inferences using `OVMS_Inference`. To stop server, you must call `OVMS_ServerDelete`. While the server is alive you can schedule both in process inferences as well as use gRPC API to schedule inferences from remote machine. Optionally you can also enable HTTP service. Example how to use OVMS with C/C++ application is [here](../demos/c_api_minimal_app/README.md). diff --git a/docs/model_server_grpc_api_kfs.md b/docs/model_server_grpc_api_kfs.md index bcd140dca8..378307e0f0 100644 --- a/docs/model_server_grpc_api_kfs.md +++ b/docs/model_server_grpc_api_kfs.md @@ -45,9 +45,9 @@ Run inference with requested model or [DAG](./dag_scheduler.md). Check KServe documentation for more [details](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#inference-1). -> **NOTE**: Inference supports putting tensor buffers either in `ModelInferRequest`'s [InferTensorContents](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/grpc_predict_v2.proto#L155) and [raw_input_contents](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/grpc_predict_v2.proto#L202). There is no support for BF16 data type and there is no support for using FP16 in `InferTensorContents`. In case of sending raw images jpeg files BYTES data type should be used and data should be put in `InferTensorContents`'s `bytes_contents` or `raw_input_contents` for batch size equal to 1. +> **NOTE**: Inference supports putting tensor buffers either in `ModelInferRequest`'s [InferTensorContents](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/grpc_predict_v2.proto#L155) and [raw_input_contents](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/grpc_predict_v2.proto#L202). There is no support for BF16 data type and there is no support for using FP16 in `InferTensorContents`. In case of sending images files or strings BYTES data type should be used and data should be put in `InferTensorContents`'s `bytes_contents` or `raw_input_contents`. -Also, using `BYTES` datatype it is possible to send binary encoded images that would be preprocessed by OVMS using opencv and converted to OpenVINO-friendly format. For more information check [how binary data is handled in OpenVINO Model Server](./binary_input_kfs.md) +Also, using `BYTES` datatype it is possible to send to model or pipeline, that have 4 (or 5 in case of [demultiplexing](demultiplexing.md)) shape dimensions, binary encoded images that would be preprocessed by OVMS using opencv and converted to OpenVINO-friendly format. For more information check [how binary data is handled in OpenVINO Model Server](./binary_input_kfs.md) ## See Also diff --git a/docs/model_server_rest_api_kfs.md b/docs/model_server_rest_api_kfs.md index 1d5f7b9792..d54c817c45 100644 --- a/docs/model_server_rest_api_kfs.md +++ b/docs/model_server_rest_api_kfs.md @@ -225,9 +225,11 @@ $request_output = } ``` -Besides numerical values, it is possible to pass binary inputs using Binary Data extension: +> Note: In `tensor_data` elements may be presented in their multi-dimensional representation, or as a flattened one-dimensional representation. Before inference execution tensor data is flattened, and only elements count in `tensor_data` is validated. -As a JPEG / PNG encoded images - in this case binary encoded data is loaded by OVMS using OpenCV which then converts it to OpenVINO-friendly data format for inference. For encoded inputs datatype `BYTES` is reserved. +Besides numerical values, it is possible to pass encoded images using Binary Data extension: + +As a JPEG / PNG encoded images - in this case binary encoded data is loaded by OVMS using OpenCV which then converts it to OpenVINO-friendly data format for inference. Input is treated as encoded image when datatype is `BYTES` and model or pipeline have 4 (or 5 in case of [demultiplexing](demultiplexing.md)) shape dimensions. Every batch the BYTES input needs to be preced by 4 bytes, litte endian, that contains its size. ```JSON Content-Type: application/octet-stream @@ -243,7 +245,7 @@ Content-Length: } ] } -<9472 bytes of data for model_input tensor> +<0x00250000 (9472 as four bytes little endian)><9472 bytes of data for model_input tensor> ``` @@ -267,6 +269,8 @@ Content-Length: <3240000 bytes of the whole data batch for model_input tensor> ``` +*sending strings inside binary extension also require preceding every batch by 4 bytes, litte endian, that contains its size. + Check [how binary data is handled in OpenVINO Model Server](./binary_input.md) for more informations. diff --git a/docs/models_repository.md b/docs/models_repository.md index 26ee01c7a7..06fe9ea4de 100644 --- a/docs/models_repository.md +++ b/docs/models_repository.md @@ -1,19 +1,17 @@ # Preparing a Model Repository {#ovms_docs_models_repository} The AI models served by OpenVINO™ Model Server must be in either of the four formats: -- [OpenVINO IR](https://docs.openvino.ai/2022.2/openvino_docs_MO_DG_IR_and_opsets.html#doxid-openvino-docs-m-o-d-g-i-r-and-opsets), where the graph is represented in .bin and .xml files +- [OpenVINO IR](https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_IR_and_opsets.html#doxid-openvino-docs-m-o-d-g-i-r-and-opsets), where the graph is represented in .bin and .xml files - [ONNX](https://onnx.ai/), using the .onnx file - [PaddlePaddle](https://www.paddlepaddle.org.cn/en), using .pdiparams and .pdmodel files -- [TensorFlow](https://www.tensorflow.org/), using frozen graph format with .pb extension (preview feature) +- [TensorFlow](https://www.tensorflow.org/), using SavedModel, MetaGraph or frozen Protobuf formats. To use models trained in other formats you need to convert them first. To do so, use -OpenVINO’s [Model Optimizer](https://docs.openvino.ai/2022.2/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html) for IR, or different +OpenVINO’s [Model Optimizer](https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html) for IR, or different [converters](https://onnx.ai/supported-tools.html) for ONNX. -The feature of direct import of Tensorflow models is currently a preview feature. Currently it supports only the frozen graph and not all topologies can be used that way. -For unsupported models you can use the Model Optimizer to convert the model to IR format. - The models need to be placed and mounted in a particular directory structure and according to the following rules: +When the models are hosted on the cloud storage, they should be frozen to be imported successfully. ``` tree models/ @@ -37,9 +35,15 @@ models/ │ └── 1 │ ├── model.pdiparams │ └── model.pdmodel -└── model5 +├── model5 +│ └── 1 +│ ├── model.pdiparams +│ └── model.pdmodel +└── model6 └── 1 - └── TF_fronzen_model.pb + ├── variables + └── saved_model.pb + ``` - Each model should be stored in a dedicated directory, e.g. model1 and model2. diff --git a/docs/ovms.png b/docs/ovms.png deleted file mode 100644 index 2e51f20616..0000000000 Binary files a/docs/ovms.png and /dev/null differ diff --git a/docs/ovms_quickstart.md b/docs/ovms_quickstart.md index 5d3abace70..70705d069a 100644 --- a/docs/ovms_quickstart.md +++ b/docs/ovms_quickstart.md @@ -1,12 +1,12 @@ # Quickstart Guide {#ovms_docs_quick_start_guide} -OpenVINO Model Server can perform inference using pre-trained models in either [OpenVINO IR](https://docs.openvino.ai/2022.2/openvino_docs_MO_DG_IR_and_opsets.html#doxid-openvino-docs-m-o-d-g-i-r-and-opsets) -, [ONNX](https://onnx.ai/), PaddlePaddle[https://github.com/PaddlePaddle/Paddle] or TensorFlow format [https://www.tensorflow.org/]. You can get them by: +OpenVINO Model Server can perform inference using pre-trained models in either [OpenVINO IR](https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_IR_and_opsets.html#doxid-openvino-docs-m-o-d-g-i-r-and-opsets) +, [ONNX](https://onnx.ai/), [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) or [TensorFlow](https://www.tensorflow.org/) format. You can get them by: - downloading models from [Open Model Zoo](https://storage.openvinotoolkit.org/repositories/open_model_zoo/public/2022.1/) -- converting other formats using [Model Optimizer](https://docs.openvino.ai/2022.2/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html) +- converting other formats using [Model Optimizer](https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html) -This guide uses a [face detection model](https://docs.openvino.ai/latest/omz_models_model_face_detection_retail_0004.html) in IR format. +This guide uses a [face detection model](https://docs.openvino.ai/2023.0/omz_models_model_face_detection_retail_0004.html) in IR format. To quickly start using OpenVINO™ Model Server follow these steps: 1. Prepare Docker diff --git a/docs/parameters.md b/docs/parameters.md index 0bfc3545bf..3bff8303f7 100644 --- a/docs/parameters.md +++ b/docs/parameters.md @@ -6,14 +6,14 @@ | Option | Value format | Description | |---|---|---| | `"model_name"/"name"` | `string` | Model name exposed over gRPC and REST API.(use `model_name` in command line, `name` in json config) | -| `"model_path"/"base_path"` | `string` | If using a Google Cloud Storage, Azure Storage or S3 path, see [cloud storage guide](./using_cloud_storage.md). The path may look as follows:
`"/opt/ml/models/model"`
`"gs://bucket/models/model"`
`"s3://bucket/models/model"`
`"azure://bucket/models/model"`
(use `model_path` in command line, `base_path` in json config) | -| `"shape"` | `tuple/json/"auto"` | `shape` is optional and takes precedence over `batch_size`. The `shape` argument changes the model that is enabled in the model server to fit the parameters. `shape` accepts three forms of the values: * `auto` - The model server reloads the model with the shape that matches the input data matrix. * a tuple, such as `(1,3,224,224)` - The tuple defines the shape to use for all incoming requests for models with a single input. * A dictionary of shapes, such as `{"input1":"(1,3,224,224)","input2":"(1,3,50,50)", "input3":"auto"}` - This option defines the shape of every included input in the model.Some models don't support the reshape operation.If the model can't be reshaped, it remains in the original parameters and all requests with incompatible input format result in an error. See the logs for more information about specific errors.Learn more about supported model graph layers including all limitations at [Shape Inference Document](https://docs.openvino.ai/2022.2/openvino_docs_IE_DG_ShapeInference.html). | +| `"model_path"/"base_path"` | `string` | If using a Google Cloud Storage, Azure Storage or S3 path, see [cloud storage guide](./using_cloud_storage.md). The path may look as follows:
`"/opt/ml/models/model"`
`"gs://bucket/models/model"`
`"s3://bucket/models/model"`
`"azure://bucket/models/model"`
The path can be also relative to the config.json location
(use `model_path` in command line, `base_path` in json config) | +| `"shape"` | `tuple/json/"auto"` | `shape` is optional and takes precedence over `batch_size`. The `shape` argument changes the model that is enabled in the model server to fit the parameters. `shape` accepts three forms of the values: * `auto` - The model server reloads the model with the shape that matches the input data matrix. * a tuple, such as `(1,3,224,224)` - The tuple defines the shape to use for all incoming requests for models with a single input. * A dictionary of shapes, such as `{"input1":"(1,3,224,224)","input2":"(1,3,50,50)", "input3":"auto"}` - This option defines the shape of every included input in the model.Some models don't support the reshape operation.If the model can't be reshaped, it remains in the original parameters and all requests with incompatible input format result in an error. See the logs for more information about specific errors.Learn more about supported model graph layers including all limitations at [Shape Inference Document](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_ShapeInference.html). | | `"batch_size"` | `integer/"auto"` | Optional. By default, the batch size is derived from the model, defined through the OpenVINO Model Optimizer. `batch_size` is useful for sequential inference requests of the same batch size.Some models, such as object detection, don't work correctly with the `batch_size` parameter. With these models, the output's first dimension doesn't represent the batch size. You can set the batch size for these models by using network reshaping and setting the `shape` parameter appropriately.The default option of using the Model Optimizer to determine the batch size uses the size of the first dimension in the first input for the size. For example, if the input shape is `(1, 3, 225, 225)`, the batch size is set to `1`. If you set `batch_size` to a numerical value, the model batch size is changed when the service starts.`batch_size` also accepts a value of `auto`. If you use `auto`, then the served model batch size is set according to the incoming data at run time. The model is reloaded each time the input data changes the batch size. You might see a delayed response upon the first request. | | `"layout" `| `json/string` | `layout` is optional argument which allows to define or change the layout of model input and output tensors. To change the layout (add the transposition step), specify `:`. Example: `NHWC:NCHW` means that user will send input data in `NHWC` layout while the model is in `NCHW` layout.

When specified without colon separator, it doesn't add a transposition but can determine the batch dimension. E.g. `--layout CN` makes prediction service treat second dimension as batch size.

When the model has multiple inputs or the output layout has to be changed, use a json format. Set the mapping, such as: `{"input1":"NHWC:NCHW","input2":"HWN:NHW","output1":"CN:NC"}`.

If not specified, layout is inherited from model.

[Read more](shape_batch_size_and_layout.md#changing-model-inputoutput-layout) | | `"model_version_policy"` | `json/string` | Optional. The model version policy lets you decide which versions of a model that the OpenVINO Model Server is to serve. By default, the server serves the latest version. One reason to use this argument is to control the server memory consumption.The accepted format is in json or string. Examples:
`{"latest": { "num_versions":2 }`
`{"specific": { "versions":[1, 3] } }`
`{"all": {} }` | | `"plugin_config"` | `json/string` | List of device plugin parameters. For full list refer to [OpenVINO documentation](https://docs.openvino.ai/2022.3/openvino_docs_OV_UG_supported_plugins_Supported_Devices.html) and [performance tuning guide](./performance_tuning.md). Example:
`{"PERFORMANCE_HINT": "LATENCY"}` | | `"nireq"` | `integer` | The size of internal request queue. When set to 0 or no value is set value is calculated automatically based on available resources.| -| `"target_device"` | `string` | Device name to be used to execute inference operations. Accepted values are: `"CPU"/"HDDL"/"GPU"/"MYRIAD"/"MULTI"/"HETERO"` | +| `"target_device"` | `string` | Device name to be used to execute inference operations. Accepted values are: `"CPU"/"GPU"/"MULTI"/"HETERO"` | | `"stateful"` | `bool` | If set to true, model is loaded as stateful. | | `"idle_sequence_cleanup"` | `bool` | If set to true, model will be subject to periodic sequence cleaner scans. See [idle sequence cleanup](stateful_models.md). | | `"max_sequence_number"` | `uint32` | Determines how many sequences can be handled concurrently by a model instance. | @@ -21,6 +21,14 @@ | `"metrics_enable"` | `bool` | Flag enabling [metrics](https://docs.openvino.ai/2022.3/ovms_docs_metrics.html) endpoint on rest_port. | | `"metrics_list"` | `string` | Comma separated list of [metrics](https://docs.openvino.ai/2022.3/ovms_docs_metrics.html). If unset, only default metrics will be enabled.| + + +> **Note** : Specifying config_path is mutually exclusive with putting model parameters in the CLI ([serving multiple models](./starting_server.md)). + +| Option | Value format | Description | +|---|---|---| +| `config_path` | `string` | Absolute path to json configuration file | + ## Server configuration options Configuration options for the server are defined only via command-line options and determine configuration common for all served models. @@ -36,9 +44,12 @@ Configuration options for the server are defined only via command-line options a | `file_system_poll_wait_seconds` | `integer` | Time interval between config and model versions changes detection in seconds. Default value is 1. Zero value disables changes monitoring. | | `sequence_cleaner_poll_wait_minutes` | `integer` | Time interval (in minutes) between next sequence cleaner scans. Sequences of the models that are subjects to idle sequence cleanup that have been inactive since the last scan are removed. Zero value disables sequence cleaner. See [idle sequence cleanup](stateful_models.md). | | `custom_node_resources_cleaner_interval_seconds` | `integer` | Time interval (in seconds) between two consecutive resources cleanup scans. Default is 1. Must be greater than 0. See [custom node development](custom_node_development.md). | -| `cpu_extension` | `string` | Optional path to a library with [custom layers implementation](https://docs.openvino.ai/2022.2/openvino_docs_Extensibility_UG_Intro.html). | +| `cpu_extension` | `string` | Optional path to a library with [custom layers implementation](https://docs.openvino.ai/2023.0/openvino_docs_Extensibility_UG_Intro.html). | | `log_level` | `"DEBUG"/"INFO"/"ERROR"` | Serving logging level | | `log_path` | `string` | Optional path to the log file. | | `cache_dir` | `string` | Path to the model cache storage. Caching will be enabled if this parameter is defined or the default path /opt/cache exists | +| `grpc_channel_arguments` | `string` | A comma separated list of arguments to be passed to the grpc server. (e.g. grpc.max_connection_age_ms=2000) | +| `help` | `NA` | Shows help message and exit | +| `version` | `NA` | Shows binary version | diff --git a/docs/performance_tuning.md b/docs/performance_tuning.md index f7665dfbf9..928b480bbb 100644 --- a/docs/performance_tuning.md +++ b/docs/performance_tuning.md @@ -47,7 +47,7 @@ GPU #### LATENCY This mode prioritizes low latency, providing short response time for each inference job. It performs best for tasks where inference is required for a single input image, like a medical analysis of an ultrasound scan image. It also fits the tasks of real-time or nearly real-time applications, such as an industrial robot's response to actions in its environment or obstacle avoidance for autonomous vehicles. -Note that currently the `PERFORMANCE_HINT` property is supported by CPU and GPU devices only. [More information](https://docs.openvino.ai/2022.2/openvino_docs_IE_DG_supported_plugins_AUTO.html#performance-hints). +Note that currently the `PERFORMANCE_HINT` property is supported by CPU and GPU devices only. [More information](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_Performance_Hints.html#performance-hints-how-it-works). To enable Performance Hints for your application, use the following command: @@ -78,16 +78,17 @@ OpenVINO™ Model Server can be tuned to a single client use case or a high execution streams. They split the available resources to perform parallel execution of multiple requests. It is particularly efficient for models which cannot effectively consume all CPU cores or for CPUs with high number of cores. -By default, number of streams is calculated based on number of available CPUs. It gives a compromise between the single client scenario and the high concurrency. -If this default configuration is not suitable, adjust it with the `NUM_STREAMS` parameter defined as part -of the device plugin configuration. +By default, number of streams is optimized for execution with minimal latency with low concurrency. The number of execution streams will be equal to the number of CPU sockets or GPU cards. +If that default configuration is not suitable, adjust it with the `NUM_STREAMS` parameter defined as part +of the device plugin configuration or set the performance hint to `THROUGHPUT`. -In a scenario where the number of parallel connections is close to 1, set the following parameter: +In a scenario with a single connections/client, set the following parameter: `--plugin_config '{"NUM_STREAMS": "1"}'` -When the number of concurrent requests is higher, increase the number of streams. Make sure, however, that the number of streams is lower than the average volume of concurrent inference operations. Otherwise, the server might not be fully utilized. -Number of streams should not exceed the number of CPU cores. +When the number of concurrent requests is high, increase the number of streams. Make sure, however, that the number of streams is lower than the average volume of concurrent inference operations. Otherwise, the server might not be fully utilized. + +Number of streams should not exceed the number of cores. For example, with ~50 clients sending the requests to the server with 48 cores, set the number of streams to 24: @@ -120,7 +121,7 @@ In case of using CPU plugin to run the inference, it might be also beneficial to | NUM_STREAMS | Specifies number of execution streams for the throughput mode | -> **NOTE:** For additional information about all parameters read [OpenVINO supported plugins](https://docs.openvino.ai/latest/groupov_runtime_cpp_prop_api.html?#detailed-documentation). +> **NOTE:** For additional information about all parameters read about [OpenVINO device properties](https://docs.openvino.ai/2023.0/groupov_runtime_cpp_prop_api.html?#detailed-documentation). - Example: Following docker command will set `NUM_STREAMS` parameter to a value `1`: @@ -135,7 +136,7 @@ docker run --rm -d --cpuset-cpus 0,1,2,3 -v ${PWD}/models/public/resnet-50-tf:/o > **NOTE:** Deployment of the OpenVINO Model Server including the autoscaling capability can be automated in Kubernetes and OpenShift using the operator. [Read more about](https://github.com/openvinotoolkit/operator/blob/main/docs/autoscaling.md) ## CPU Power Management Settings -To save power, the OS can decrease the CPU frequency and increase a volatility of the latency values. Similarly the Intel® Turbo Boost Technology may also affect the stability of results. For best reproducibility, consider locking the frequency to the processor base frequency (refer to the https://ark.intel.com/ for your specific CPU). For example, in Linux setting the relevant values for the /sys/devices/system/cpu/cpu* entries does the trick. [Read more](https://docs.openvino.ai/2022.2/openvino_docs_optimization_guide_dldt_optimization_guide.html). High-level commands like cpupower also exists: +To save power, the OS can decrease the CPU frequency and increase a volatility of the latency values. Similarly the Intel® Turbo Boost Technology may also affect the stability of results. For best reproducibility, consider locking the frequency to the processor base frequency (refer to the https://ark.intel.com/ for your specific CPU). For example, in Linux setting the relevant values for the /sys/devices/system/cpu/cpu* entries does the trick. High-level commands like cpupower also exists: ``` $ cpupower frequency-set --min 3.1GHz ``` @@ -163,7 +164,7 @@ The default value is 1 second which ensures prompt response to creating new mode Depending on the device employed to run the inference operation, you can tune the execution behavior with a set of parameters. Each device is handled by its OpenVINO plugin. -> **NOTE**: For additional information, read [supported configuration parameters for all plugins](https://docs.openvino.ai/latest/groupov_runtime_cpp_prop_api.html?#detailed-documentation). +> **NOTE**: For additional information, read [supported configuration parameters for all plugins](https://docs.openvino.ai/2023.0/groupov_runtime_cpp_prop_api.html?#detailed-documentation). Model's plugin configuration is a dictionary of param:value pairs passed to OpenVINO Plugin on network load. It can be set with `plugin_config` parameter. @@ -178,11 +179,11 @@ docker run --rm -d -v ${PWD}/models/public/resnet-50-tf:/opt/model -p 9001:9001 ## Analyzing performance issues Recommended steps to investigate achievable performance and discover bottlenecks: -1. [Launch OV benchmark app](https://docs.openvino.ai/latest/openvino_inference_engine_tools_benchmark_tool_README.html?highlight=benchmark) +1. [Launch OV benchmark app](https://docs.openvino.ai/2023.0/openvino_inference_engine_tools_benchmark_tool_README.html?highlight=benchmark) **Note:** It is useful to drop plugin configuration from benchmark app using `-dump_config` and then use the same plugin configuration in model loaded into OVMS **Note:** When launching benchmark app use `-inference_only=false`. Otherwise OV avoids setting input tensor of inference each time which is not comparable flow to OVMS. -2. [Launch OVMS benchmark client](https://docs.openvino.ai/latest/ovms_demo_benchmark_client.html) on the same machine as OVMS -3. [Launch OVMS benchmark client](https://docs.openvino.ai/latest/ovms_demo_benchmark_client.html) from remote machine +2. [Launch OVMS benchmark client](https://docs.openvino.ai/2023.0/ovms_demo_benchmark_client.html) on the same machine as OVMS +3. [Launch OVMS benchmark client](https://docs.openvino.ai/2023.0/ovms_demo_benchmark_client.html) from remote machine 4. Measure achievable network bandwidth with tools such as [iperf](https://github.com/esnet/iperf) diff --git a/docs/security_considerations.md b/docs/security_considerations.md index 72b8d2dd5e..2424aa979c 100644 --- a/docs/security_considerations.md +++ b/docs/security_considerations.md @@ -18,5 +18,5 @@ OpenVINO Model Server currently does not provide access restrictions and traffic See also: - [Securing OVMS with NGINX](../extras/nginx-mtls-auth/README.md) -- [Securing models with OVSA](https://docs.openvino.ai/2022.2/ovsa_get_started.html) +- [Securing models with OVSA](https://docs.openvino.ai/2023.0/ovsa_get_started.html) diff --git a/docs/shape_batch_size_and_layout.md b/docs/shape_batch_size_and_layout.md index 614a267156..90ad6351a2 100644 --- a/docs/shape_batch_size_and_layout.md +++ b/docs/shape_batch_size_and_layout.md @@ -28,7 +28,7 @@ it ignores the batch_size value. - JSON object e.g. `{"input1":"(1,3,224,224)","input2":"(1,3,50,50)"}` - it defines a shape of every included input in the model *Note:* Some models do not support the reshape operation. Learn more about supported model graph layers including all limitations -on [Shape Inference Document](https://docs.openvino.ai/2022.2/openvino_docs_IE_DG_ShapeInference.html). +on [Shape Inference Document](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_ShapeInference.html). In case the model can't be reshaped, it will remain in the original parameters and all requests with incompatible input format will get an error. The model server will also report such problems in the logs. diff --git a/docs/starting_server.md b/docs/starting_server.md index d76850d45e..1c23eb6bee 100644 --- a/docs/starting_server.md +++ b/docs/starting_server.md @@ -120,9 +120,8 @@ To serve multiple models from the same container you will need an additional JSO "config":{ "name":"model_name5", "base_path":"s3://bucket/models/model5", - "shape": "auto", "nireq": 32, - "target_device": "HDDL" + "target_device": "GPU" } } ] @@ -130,6 +129,8 @@ To serve multiple models from the same container you will need an additional JSO ``` Once the Docker container has the path to your config file mounted, it can be started. This simplifies the `docker run` command, as arguments are now read from the config file. +When the `base_path` in the config.json is without cloud URI prefix or `/` character, the path will be relative to the config file location. +This is helpful when models are distributed together with the config file, the paths do not need to be adjusted. ## Next Steps diff --git a/docs/stateful_models.md b/docs/stateful_models.md index b8cb2c7538..3523f2177c 100644 --- a/docs/stateful_models.md +++ b/docs/stateful_models.md @@ -71,7 +71,7 @@ docker run -d -u $(id -u):$(id -g) -v $(pwd)/rm_lstm4f:/models/stateful_model -v | `stateful` | `bool` | If set to true, model is loaded as stateful. | false | | `idle_sequence_cleanup` | `bool` | If set to true, model will be subject to periodic sequence cleaner scans.
See [idle sequence cleanup](#stateful_cleanup). | true | | `max_sequence_number` | `uint32` | Determines how many sequences can be handled concurrently by a model instance. | 500 | -| `low_latency_transformation` | `bool` | If set to true, model server will apply [low latency transformation](https://docs.openvino.ai/2022.2/openvino_docs_IE_DG_network_state_intro.html#lowlatency_transformation) on model load. | false | +| `low_latency_transformation` | `bool` | If set to true, model server will apply [low latency transformation](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_model_state_intro.html#lowlatency-transformations) on model load. | false | **Note:** Setting `idle_sequence_cleanup`, `max_sequence_number` and `low_latency_transformation` require setting `stateful` to true. @@ -309,7 +309,7 @@ If set to `true` sequence cleaner will check that model. Otherwise, sequence cle There are limitations for using stateful models with OVMS: - Support inference execution only using CPU as the target device. - - Support Kaldi models with memory layers and non-Kaldi models with Tensor Iterator. See this [docs about stateful networks](https://docs.openvino.ai/2022.2/openvino_docs_IE_DG_network_state_intro.html) to learn about stateful networks representation in OpenVINO. + - Support Kaldi models with memory layers and non-Kaldi models with Tensor Iterator. See this [docs about stateful networks](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_model_state_intro.html) to learn about stateful networks representation in OpenVINO. - [Auto batch size and shape](shape_batch_size_and_layout.md) are **not** available in stateful models. - Stateful model instances **cannot** be used in [DAGs](dag_scheduler.md). - Requests ordering is guaranteed only when a single client sends subsequent requests in a synchronous manner. Concurrent interaction with the same sequence might negatively affect the accuracy of the results. diff --git a/docs/text_handling.md b/docs/text_handling.md new file mode 100644 index 0000000000..66699e62c6 --- /dev/null +++ b/docs/text_handling.md @@ -0,0 +1,40 @@ +# Support for text data format {#ovms_docs_text} + +OpenVINO Model Server can now greatly simplify writing the applications with Natural Language Processing models. For the use cases related to text analysis or text generation, the client application can communicate with the model server using the original text format. There is no requirement to perform pre and post processing on the client side. Tokenization and detokenization can be now fully delegated to the server. + +We addressed both the situation when the original model requires tokens on input or output and there is added support for models with embedded tokenization layer. Below are demonstrated use cases with a simple client application sending and receiving text in a string format. Whole complexity of the text conversion is fully delegated to the remote serving endpoint. + + +## DAG pipeline to delegate tokenization to the server +When the model is using tokens on input or output, you can create a DAG pipeline which include custom nodes performing pre and post processing. +OpenVINO Model Server can accept the text data format on the gRPC and REST API interface and deserializes it to the 2D array of bytes, where each row represents single, null terminated sentence, padded with `\0` aligned to longest batch. + +Example of batch size 2 of the string input - `abcd` and `ab`: +``` +[ + 'a', 'b', 'c', 'd', 0, + 'a', 'b', 0 , 0 , 0 +] +``` +Such data in a tensor format can be passed to the custom node to perform the preprocessing like string tokenization. The output of the preprocessing node can be passed to the model. +There is a built-in [Tokenizer](https://github.com/openvinotoolkit/model_server/tree/develop/src/custom_nodes/tokenizer) custom node for that use case based on Blingfire library. + +Similarly, a custom node can perform string detokenization and return a string to the model server client. + +Check the [end-to-end demonstration](../demos/gptj_causal_lm/python/README.md) of such use case with GPT based text generation. + +The client API snippets with string data format are included in [KServe API](./clients_kfs.md) and [TFS API](./clients_tfs.md). + + +## Custom CPU extension for tokenization layer in the model + +Some AI model training frameworks supports the layers accepting the string format on the input or output. They include the layers performing the tokenization operations inside the neural network. +While OpenVINO doesn't support natively string data type, it is possible to extend the capabilities with a CPU extension. +We included in the model server a built-in extension for [SentencepieceTokenizer](https://github.com/openvinotoolkit/openvino_contrib/tree/master/modules/custom_operations) layer from TensorFlow. +The extension is capable of converting 1D U8 OpenVINO tensor into appropriate format for [SentencepieceTokenizer]. OVMS is able to detect such layer and create 1D U8 tensor out of KServe/TensorflowServing API strings automatically. + +A demonstration of such use case is in the MUSE model which can be imported directly but the models server. The client can send the text data withtout any preprocessing and take advantage of much faster execution time. +Check the [MUSE demo](../demos/universal-sentence-encoder/README.md). + + + diff --git a/docs/tf_model_binary_input.md b/docs/tf_model_binary_input.md index 47d3f88905..eb3b8b66ae 100644 --- a/docs/tf_model_binary_input.md +++ b/docs/tf_model_binary_input.md @@ -4,7 +4,7 @@ This guide shows how to convert TensorFlow models and deploy them with the OpenV - In this example TensorFlow model [ResNet](https://github.com/tensorflow/models/tree/v2.2.0/official/r1/resnet) will be used. -- TensorFlow model can be converted into Intermediate Representation format using model_optimizer tool. There are several formats for storing TensorFlow model. In this guide, we present conversion from SavedModel format. More information about conversion process can be found on the [model optimizer documentation](https://docs.openvino.ai/2022.2/openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_TensorFlow.html#savedmodel_format). +- TensorFlow model can be converted into Intermediate Representation format using model_optimizer tool. There are several formats for storing TensorFlow model. In this guide, we present conversion from SavedModel format. More information about conversion process can be found in the [model optimizer guide](https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_prepare_model_convert_model_tutorials.html). - Binary input format has several requirements for the model and ovms configuration. More information can be found in [binary inputs documentation](binary_input.md). ## Steps @@ -29,10 +29,10 @@ docker run -u $(id -u):$(id -g) -v ${PWD}/resnet_v2/:/resnet openvino/ubuntu20_d *Note:* Some models might require other parameters such as `--scale` parameter. - `--reverse_input_channels` - required for models that are trained with images in RGB order. -- `--mean_values` , `--scale` - should be provided if input pre-processing operations are not a part of topology- and the pre-processing relies on the application providing input data. They can be determined in several ways described in [conversion parameters guide](https://docs.openvino.ai/2022.2/openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model_General.html). In this example [model pre-processing script](https://github.com/tensorflow/models/blob/v2.2.0/official/r1/resnet/imagenet_preprocessing.py) was used to determine them. +- `--mean_values` , `--scale` - should be provided if input pre-processing operations are not a part of topology- and the pre-processing relies on the application providing input data. They can be determined in several ways described in [conversion parameters guide](https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html). In this example [model pre-processing script](https://github.com/tensorflow/models/blob/v2.2.0/official/r1/resnet/imagenet_preprocessing.py) was used to determine them. -*Note:* You can find out more about [TensorFlow Model conversion into Intermediate Representation](https://docs.openvino.ai/2022.2/openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_TensorFlow.html) if your model is stored in other formats. +*Note:* You can find out more about [TensorFlow Model conversion into Intermediate Representation](https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_TensorFlow.html) if your model is stored in other formats. This operation will create model files in `${PWD}/resnet_v2/models/resnet/1/` folder. ```bash diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 9376f5715f..506d94817b 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -102,4 +102,3 @@ HTTP_proxy - Cache folder (by default `/opt/cache` or defined by `--cache_dir`) should be mounted into docker container with read-write access. Unless changed by the docker run command, the model server has a security context of ovms account with uid 5000. - The biggest speedup in the model loading time is expected for GPU device. For CPU device the gain will depend on the model topology. In some rare cases, it is possible the load time will not be improved noticeably or it might be even slightly slower. -- Currently using model cache is not supported with HDDL target device. Do not enable model cache while using HDDL cards. diff --git a/external/BUILD b/external/BUILD new file mode 100644 index 0000000000..0f9b9c8882 --- /dev/null +++ b/external/BUILD @@ -0,0 +1,50 @@ +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# The tf.patch "tf logging macros" part is changing the logging.h file because it has macro redefinition conflicts +# with mediapipe logging.h on the following macros and functions connected with them. +# We are adding glog/logging.h include and the github glog dependency in the tensorflow project +# so that it uses the same macros from common dependency and we remove the conflicting macros from original file. + +# define LOG(severity) _TF_LOG_##severit +# define VLOG_IS_ON(lvl) +# define VLOG(level) +# define DVLOG(verbose_level) +# define LOG_EVERY_N(severity, n) +# define LOG_FIRST_N(severity, n) +# define CHECK(condition) +# define CHECK_OP_LOG(name, op, val1, val2) +# define CHECK_OP(name, op, val1, val2) CHECK_OP_LOG(name, op, val1, val2) +# define CHECK_EQ(val1, val2) CHECK_OP(Check_EQ, ==, val1, val2) +# define CHECK_NE(val1, val2) CHECK_OP(Check_NE, !=, val1, val2) +# define CHECK_LE(val1, val2) CHECK_OP(Check_LE, <=, val1, val2) +# define CHECK_LT(val1, val2) CHECK_OP(Check_LT, <, val1, val2) +# define CHECK_GE(val1, val2) CHECK_OP(Check_GE, >=, val1, val2) +# define CHECK_GT(val1, val2) CHECK_OP(Check_GT, >, val1, val2) + +# define DCHECK_EQ(x, y) _TF_DCHECK_NOP(x, y) +# define DCHECK_NE(x, y) _TF_DCHECK_NOP(x, y) +# define DCHECK_LE(x, y) _TF_DCHECK_NOP(x, y) +# define DCHECK_LT(x, y) _TF_DCHECK_NOP(x, y) +# define DCHECK_GE(x, y) _TF_DCHECK_NOP(x, y) + +licenses(["notice"]) + +exports_files([ + "listen.patch", + "tf.patch", + "net_http.patch", +]) \ No newline at end of file diff --git a/external/mwaitpkg.patch b/external/mwaitpkg.patch new file mode 100644 index 0000000000..d4c2eb2999 --- /dev/null +++ b/external/mwaitpkg.patch @@ -0,0 +1,14 @@ +diff --git a/BUILD.bazel b/BUILD.bazel +index 7aecb3e0..5c81961e 100644 +--- a/BUILD.bazel ++++ b/BUILD.bazel +@@ -37,7 +37,7 @@ cc_library( + ]), + copts = ["-w"] + select({ + "@bazel_tools//platforms:windows": [""], +- "//conditions:default": ["-mwaitpkg"], ++ "//conditions:default": [""], + }), + defines = + select({ + diff --git a/external/tf.patch b/external/tf.patch index d144ed97ce..d833d4b4e5 100644 --- a/external/tf.patch +++ b/external/tf.patch @@ -37,3 +37,469 @@ index be571aaf1f8..a8765b08bd4 100644 ], ) +From c29c6030c78a635541c01bb394a337f0133aa2a0 Mon Sep 17 00:00:00 2001 +From: Rafal Sapala +Date: Fri, 5 May 2023 13:57:03 +0200 +Subject: [PATCH] tf logging macros + +--- + tensorflow/core/platform/default/BUILD | 13 ++ + tensorflow/core/platform/default/log_macros.h | 119 ++++++++++++ + tensorflow/core/platform/default/logging.cc | 4 - + tensorflow/core/platform/default/logging.h | 174 ++---------------- + 4 files changed, 144 insertions(+), 166 deletions(-) + create mode 100644 tensorflow/core/platform/default/log_macros.h + +diff --git a/tensorflow/core/platform/default/BUILD b/tensorflow/core/platform/default/BUILD +index 0856bb1edce..18f6211e11e 100644 +--- a/tensorflow/core/platform/default/BUILD ++++ b/tensorflow/core/platform/default/BUILD +@@ -188,6 +188,18 @@ cc_library( + ], + ) + ++cc_library( ++ name = "log_macros", ++ hdrs = ["log_macros.h"], ++ visibility = ["//visibility:public"], ++) ++ ++cc_library( ++ name = "glog", ++ visibility = ["//visibility:public"], ++ deps = ["@com_github_glog_glog//:glog",], ++) ++ + cc_library( + name = "logging", + srcs = ["logging.cc"], +@@ -199,6 +211,7 @@ cc_library( + ], + textual_hdrs = ["logging.h"], + deps = [ ++ ":glog", + "//tensorflow/core/platform", + "//tensorflow/core/platform:env_time", + "//tensorflow/core/platform:macros", +diff --git a/tensorflow/core/platform/default/log_macros.h b/tensorflow/core/platform/default/log_macros.h +new file mode 100644 +index 00000000000..eee3803d12c +--- /dev/null ++++ b/tensorflow/core/platform/default/log_macros.h +@@ -0,0 +1,119 @@ ++#pragma once ++#define LOG(severity) _TF_LOG_##severity ++ ++// An instance of `LOG_EVERY_N` increments a hidden zero-initialized counter ++// every time execution passes through it and logs the specified message when ++// the counter's value is a multiple of `n`, doing nothing otherwise. Each ++// instance has its own counter. The counter's value can be logged by streaming ++// the symbol `COUNTER`. `LOG_EVERY_N` is thread-safe. ++// Example: ++// ++// for (const auto& user : all_users) { ++// LOG_EVERY_N(INFO, 1000) << "Processing user #" << COUNTER; ++// ProcessUser(user); ++// } ++#define LOG_EVERY_N(severity, n) \ ++ LOGGING_INTERNAL_STATEFUL_CONDITION(EveryN, true, n) \ ++ LOG(severity) ++ ++// CHECK dies with a fatal error if condition is not true. It is *not* ++// controlled by NDEBUG, so the check will be executed regardless of ++// compilation mode. Therefore, it is safe to do things like: ++// CHECK(fp->Write(x) == 4) ++#define CHECK(condition) \ ++ if (TF_PREDICT_FALSE(!(condition))) \ ++ LOG(FATAL) << "Check failed: " #condition " " ++// `LOG_FIRST_N` behaves like `LOG_EVERY_N` except that the specified message is ++// logged when the counter's value is less than `n`. `LOG_FIRST_N` is ++// thread-safe. ++#define LOG_FIRST_N(severity, n) \ ++ LOGGING_INTERNAL_STATEFUL_CONDITION(FirstN, true, n) \ ++ LOG(severity) ++ ++#ifdef IS_MOBILE_PLATFORM ++// Turn VLOG off when under mobile devices for considerations of binary size. ++#define VLOG_IS_ON(lvl) ((lvl) <= 0) ++ ++#else ++ ++// Otherwise, set TF_CPP_MAX_VLOG_LEVEL environment to update minimum log level ++// of VLOG, or TF_CPP_VMODULE to set the minimum log level for individual ++// translation units. ++#define VLOG_IS_ON(lvl) \ ++ (([](int level, const char* fname) { \ ++ static const bool vmodule_activated = \ ++ ::tensorflow::internal::LogMessage::VmoduleActivated(fname, level); \ ++ return vmodule_activated; \ ++ })(lvl, __FILE__)) ++#endif ++ ++#define VLOG(level) \ ++ TF_PREDICT_TRUE(!VLOG_IS_ON(level)) \ ++ ? (void)0 \ ++ : ::tensorflow::internal::Voidifier() & \ ++ ::tensorflow::internal::LogMessage(__FILE__, __LINE__, \ ++ tensorflow::INFO) ++ ++// `DVLOG` behaves like `VLOG` in debug mode (i.e. `#ifndef NDEBUG`). ++// Otherwise, it compiles away and does nothing. ++#ifndef NDEBUG ++#define DVLOG VLOG ++#else ++#ifndef DVLOG ++#define DVLOG(verbose_level) \ ++ while (false && (verbose_level) > 0) ::tensorflow::internal::LogMessageNull() ++#endif ++#endif ++ ++ ++// In optimized mode, use CheckOpString to hint to compiler that ++// the while condition is unlikely. ++#define CHECK_OP_LOG(name, op, val1, val2) \ ++ while (::tensorflow::internal::CheckOpString _result{ \ ++ ::tensorflow::internal::name##Impl( \ ++ ::tensorflow::internal::GetReferenceableValue(val1), \ ++ ::tensorflow::internal::GetReferenceableValue(val2), \ ++ #val1 " " #op " " #val2)}) \ ++ ::tensorflow::internal::LogMessageFatal(__FILE__, __LINE__) << *(_result.str_) ++ ++#define CHECK_OP(name, op, val1, val2) CHECK_OP_LOG(name, op, val1, val2) ++ ++// CHECK_EQ/NE/... ++#define CHECK_EQ(val1, val2) CHECK_OP(Check_EQ, ==, val1, val2) ++#define CHECK_NE(val1, val2) CHECK_OP(Check_NE, !=, val1, val2) ++#define CHECK_LE(val1, val2) CHECK_OP(Check_LE, <=, val1, val2) ++#define CHECK_LT(val1, val2) CHECK_OP(Check_LT, <, val1, val2) ++#define CHECK_GE(val1, val2) CHECK_OP(Check_GE, >=, val1, val2) ++#define CHECK_GT(val1, val2) CHECK_OP(Check_GT, >, val1, val2) ++ ++ ++#ifndef NDEBUG ++// DCHECK_EQ/NE/... ++#define DCHECK(condition) CHECK(condition) ++#define DCHECK_EQ(val1, val2) CHECK_EQ(val1, val2) ++#define DCHECK_NE(val1, val2) CHECK_NE(val1, val2) ++#define DCHECK_LE(val1, val2) CHECK_LE(val1, val2) ++#define DCHECK_LT(val1, val2) CHECK_LT(val1, val2) ++#define DCHECK_GE(val1, val2) CHECK_GE(val1, val2) ++#define DCHECK_GT(val1, val2) CHECK_GT(val1, val2) ++ ++#else ++ ++#define DCHECK(condition) \ ++ while (false && (condition)) LOG(FATAL) ++ ++// NDEBUG is defined, so DCHECK_EQ(x, y) and so on do nothing. ++// However, we still want the compiler to parse x and y, because ++// we don't want to lose potentially useful errors and warnings. ++// _DCHECK_NOP is a helper, and should not be used outside of this file. ++#define _TF_DCHECK_NOP(x, y) \ ++ while (false && ((void)(x), (void)(y), 0)) LOG(FATAL) ++ ++#define DCHECK_EQ(x, y) _TF_DCHECK_NOP(x, y) ++#define DCHECK_NE(x, y) _TF_DCHECK_NOP(x, y) ++#define DCHECK_LE(x, y) _TF_DCHECK_NOP(x, y) ++#define DCHECK_LT(x, y) _TF_DCHECK_NOP(x, y) ++#define DCHECK_GE(x, y) _TF_DCHECK_NOP(x, y) ++#define DCHECK_GT(x, y) _TF_DCHECK_NOP(x, y) ++ ++#endif +\ No newline at end of file +diff --git a/tensorflow/core/platform/default/logging.cc b/tensorflow/core/platform/default/logging.cc +index df7734f8f35..8aa40fa6b08 100644 +--- a/tensorflow/core/platform/default/logging.cc ++++ b/tensorflow/core/platform/default/logging.cc +@@ -380,7 +380,6 @@ void LogString(const char* fname, int line, int severity, + LogMessage(fname, line, severity) << message; + } + +-template <> + void MakeCheckOpValueString(std::ostream* os, const char& v) { + if (v >= 32 && v <= 126) { + (*os) << "'" << v << "'"; +@@ -389,7 +388,6 @@ void MakeCheckOpValueString(std::ostream* os, const char& v) { + } + } + +-template <> + void MakeCheckOpValueString(std::ostream* os, const signed char& v) { + if (v >= 32 && v <= 126) { + (*os) << "'" << v << "'"; +@@ -398,7 +396,6 @@ void MakeCheckOpValueString(std::ostream* os, const signed char& v) { + } + } + +-template <> + void MakeCheckOpValueString(std::ostream* os, const unsigned char& v) { + if (v >= 32 && v <= 126) { + (*os) << "'" << v << "'"; +@@ -408,7 +405,6 @@ void MakeCheckOpValueString(std::ostream* os, const unsigned char& v) { + } + + #if LANG_CXX11 +-template <> + void MakeCheckOpValueString(std::ostream* os, const std::nullptr_t& v) { + (*os) << "nullptr"; + } +diff --git a/tensorflow/core/platform/default/logging.h b/tensorflow/core/platform/default/logging.h +index 27331b2bcd1..c6d56ce1955 100644 +--- a/tensorflow/core/platform/default/logging.h ++++ b/tensorflow/core/platform/default/logging.h +@@ -36,9 +36,18 @@ limitations under the License. + #include "tensorflow/core/platform/macros.h" + #include "tensorflow/core/platform/types.h" + ++#pragma GCC diagnostic push ++#pragma GCC diagnostic ignored "-Wsign-compare" ++ ++#include "glog/logging.h" ++ ++#pragma GCC diagnostic pop ++ + // TODO(mrry): Prevent this Windows.h #define from leaking out of our headers. + #undef ERROR + ++#define COMPACT_GOOGLE_LOG_QFATAL COMPACT_GOOGLE_LOG_ERROR ++ + namespace tensorflow { + const int INFO = 0; // base_logging::INFO; + const int WARNING = 1; // base_logging::WARNING; +@@ -114,43 +123,6 @@ class LogMessageNull : public std::basic_ostringstream { + + #define _TF_LOG_QFATAL _TF_LOG_FATAL + +-#define LOG(severity) _TF_LOG_##severity +- +-#ifdef IS_MOBILE_PLATFORM +- +-// Turn VLOG off when under mobile devices for considerations of binary size. +-#define VLOG_IS_ON(lvl) ((lvl) <= 0) +- +-#else +- +-// Otherwise, set TF_CPP_MAX_VLOG_LEVEL environment to update minimum log level +-// of VLOG, or TF_CPP_VMODULE to set the minimum log level for individual +-// translation units. +-#define VLOG_IS_ON(lvl) \ +- (([](int level, const char* fname) { \ +- static const bool vmodule_activated = \ +- ::tensorflow::internal::LogMessage::VmoduleActivated(fname, level); \ +- return vmodule_activated; \ +- })(lvl, __FILE__)) +- +-#endif +- +-#define VLOG(level) \ +- TF_PREDICT_TRUE(!VLOG_IS_ON(level)) \ +- ? (void)0 \ +- : ::tensorflow::internal::Voidifier() & \ +- ::tensorflow::internal::LogMessage(__FILE__, __LINE__, \ +- tensorflow::INFO) +- +-// `DVLOG` behaves like `VLOG` in debug mode (i.e. `#ifndef NDEBUG`). +-// Otherwise, it compiles away and does nothing. +-#ifndef NDEBUG +-#define DVLOG VLOG +-#else +-#define DVLOG(verbose_level) \ +- while (false && (verbose_level) > 0) ::tensorflow::internal::LogMessageNull() +-#endif +- + class LogEveryNState { + public: + bool ShouldLog(int n); +@@ -217,26 +189,6 @@ class LogEveryNSecState { + logging_internal_stateful_condition_do_log; \ + logging_internal_stateful_condition_do_log = false) + +-// An instance of `LOG_EVERY_N` increments a hidden zero-initialized counter +-// every time execution passes through it and logs the specified message when +-// the counter's value is a multiple of `n`, doing nothing otherwise. Each +-// instance has its own counter. The counter's value can be logged by streaming +-// the symbol `COUNTER`. `LOG_EVERY_N` is thread-safe. +-// Example: +-// +-// for (const auto& user : all_users) { +-// LOG_EVERY_N(INFO, 1000) << "Processing user #" << COUNTER; +-// ProcessUser(user); +-// } +-#define LOG_EVERY_N(severity, n) \ +- LOGGING_INTERNAL_STATEFUL_CONDITION(EveryN, true, n) \ +- LOG(severity) +-// `LOG_FIRST_N` behaves like `LOG_EVERY_N` except that the specified message is +-// logged when the counter's value is less than `n`. `LOG_FIRST_N` is +-// thread-safe. +-#define LOG_FIRST_N(severity, n) \ +- LOGGING_INTERNAL_STATEFUL_CONDITION(FirstN, true, n) \ +- LOG(severity) + // `LOG_EVERY_POW_2` behaves like `LOG_EVERY_N` except that the specified + // message is logged when the counter's value is a power of 2. + // `LOG_EVERY_POW_2` is thread-safe. +@@ -254,13 +206,6 @@ class LogEveryNSecState { + LOGGING_INTERNAL_STATEFUL_CONDITION(EveryNSec, true, n_seconds) \ + LOG(severity) + +-// CHECK dies with a fatal error if condition is not true. It is *not* +-// controlled by NDEBUG, so the check will be executed regardless of +-// compilation mode. Therefore, it is safe to do things like: +-// CHECK(fp->Write(x) == 4) +-#define CHECK(condition) \ +- if (TF_PREDICT_FALSE(!(condition))) \ +- LOG(FATAL) << "Check failed: " #condition " " + + // Function is overloaded for integral types to allow static const + // integrals declared in classes and not defined to be used as arguments to +@@ -279,28 +224,6 @@ inline unsigned int GetReferenceableValue(unsigned int t) { return t; } + inline int64 GetReferenceableValue(int64 t) { return t; } + inline uint64 GetReferenceableValue(uint64 t) { return t; } + +-// This formats a value for a failing CHECK_XX statement. Ordinarily, +-// it uses the definition for operator<<, with a few special cases below. +-template +-inline void MakeCheckOpValueString(std::ostream* os, const T& v) { +- (*os) << v; +-} +- +-// Overrides for char types provide readable values for unprintable +-// characters. +-template <> +-void MakeCheckOpValueString(std::ostream* os, const char& v); +-template <> +-void MakeCheckOpValueString(std::ostream* os, const signed char& v); +-template <> +-void MakeCheckOpValueString(std::ostream* os, const unsigned char& v); +- +-#if LANG_CXX11 +-// We need an explicit specialization for std::nullptr_t. +-template <> +-void MakeCheckOpValueString(std::ostream* os, const std::nullptr_t& v); +-#endif +- + // A container for a string pointer which can be evaluated to a bool - + // true iff the pointer is non-NULL. + struct CheckOpString { +@@ -311,17 +234,6 @@ struct CheckOpString { + string* str_; + }; + +-// Build the error message string. Specify no inlining for code size. +-template +-string* MakeCheckOpString(const T1& v1, const T2& v2, +- const char* exprtext) TF_ATTRIBUTE_NOINLINE; +- +-// A helper class for formatting "expr (V1 vs. V2)" in a CHECK_XX +-// statement. See MakeCheckOpString for sample usage. Other +-// approaches were considered: use of a template method (e.g., +-// base::BuildCheckOpString(exprtext, base::Print, &v1, +-// base::Print, &v2), however this approach has complications +-// related to volatile arguments and function-pointer arguments). + class CheckOpMessageBuilder { + public: + // Inserts "exprtext" and " (" to the stream. +@@ -339,14 +251,6 @@ class CheckOpMessageBuilder { + std::ostringstream* stream_; + }; + +-template +-string* MakeCheckOpString(const T1& v1, const T2& v2, const char* exprtext) { +- CheckOpMessageBuilder comb(exprtext); +- MakeCheckOpValueString(comb.ForVar1(), v1); +- MakeCheckOpValueString(comb.ForVar2(), v2); +- return comb.NewString(); +-} +- + // Helper functions for CHECK_OP macro. + // The (int, int) specialization works around the issue that the compiler + // will not instantiate the template version of the function on values of +@@ -360,7 +264,7 @@ string* MakeCheckOpString(const T1& v1, const T2& v2, const char* exprtext) { + if (TF_PREDICT_TRUE(v1 op v2)) \ + return NULL; \ + else \ +- return ::tensorflow::internal::MakeCheckOpString(v1, v2, exprtext); \ ++ return google::MakeCheckOpString(v1, v2, exprtext); \ + } \ + inline string* name##Impl(int v1, int v2, const char* exprtext) { \ + return name##Impl(v1, v2, exprtext); \ +@@ -368,14 +272,14 @@ string* MakeCheckOpString(const T1& v1, const T2& v2, const char* exprtext) { + inline string* name##Impl(const size_t v1, const int v2, \ + const char* exprtext) { \ + if (TF_PREDICT_FALSE(v2 < 0)) { \ +- return ::tensorflow::internal::MakeCheckOpString(v1, v2, exprtext); \ ++ return google::MakeCheckOpString(v1, v2, exprtext); \ + } \ + return name##Impl(v1, v2, exprtext); \ + } \ + inline string* name##Impl(const int v1, const size_t v2, \ + const char* exprtext) { \ + if (TF_PREDICT_FALSE(v2 >= std::numeric_limits::max())) { \ +- return ::tensorflow::internal::MakeCheckOpString(v1, v2, exprtext); \ ++ return google::MakeCheckOpString(v1, v2, exprtext); \ + } \ + const size_t uval = (size_t)((unsigned)v2); \ + return name##Impl(v1, uval, exprtext); \ +@@ -394,60 +298,6 @@ TF_DEFINE_CHECK_OP_IMPL(Check_GE, >=) + TF_DEFINE_CHECK_OP_IMPL(Check_GT, >) + #undef TF_DEFINE_CHECK_OP_IMPL + +-// In optimized mode, use CheckOpString to hint to compiler that +-// the while condition is unlikely. +-#define CHECK_OP_LOG(name, op, val1, val2) \ +- while (::tensorflow::internal::CheckOpString _result{ \ +- ::tensorflow::internal::name##Impl( \ +- ::tensorflow::internal::GetReferenceableValue(val1), \ +- ::tensorflow::internal::GetReferenceableValue(val2), \ +- #val1 " " #op " " #val2)}) \ +- ::tensorflow::internal::LogMessageFatal(__FILE__, __LINE__) << *(_result.str_) +- +-#define CHECK_OP(name, op, val1, val2) CHECK_OP_LOG(name, op, val1, val2) +- +-// CHECK_EQ/NE/... +-#define CHECK_EQ(val1, val2) CHECK_OP(Check_EQ, ==, val1, val2) +-#define CHECK_NE(val1, val2) CHECK_OP(Check_NE, !=, val1, val2) +-#define CHECK_LE(val1, val2) CHECK_OP(Check_LE, <=, val1, val2) +-#define CHECK_LT(val1, val2) CHECK_OP(Check_LT, <, val1, val2) +-#define CHECK_GE(val1, val2) CHECK_OP(Check_GE, >=, val1, val2) +-#define CHECK_GT(val1, val2) CHECK_OP(Check_GT, >, val1, val2) +-#define CHECK_NOTNULL(val) \ +- ::tensorflow::internal::CheckNotNull(__FILE__, __LINE__, \ +- "'" #val "' Must be non NULL", (val)) +- +-#ifndef NDEBUG +-// DCHECK_EQ/NE/... +-#define DCHECK(condition) CHECK(condition) +-#define DCHECK_EQ(val1, val2) CHECK_EQ(val1, val2) +-#define DCHECK_NE(val1, val2) CHECK_NE(val1, val2) +-#define DCHECK_LE(val1, val2) CHECK_LE(val1, val2) +-#define DCHECK_LT(val1, val2) CHECK_LT(val1, val2) +-#define DCHECK_GE(val1, val2) CHECK_GE(val1, val2) +-#define DCHECK_GT(val1, val2) CHECK_GT(val1, val2) +- +-#else +- +-#define DCHECK(condition) \ +- while (false && (condition)) LOG(FATAL) +- +-// NDEBUG is defined, so DCHECK_EQ(x, y) and so on do nothing. +-// However, we still want the compiler to parse x and y, because +-// we don't want to lose potentially useful errors and warnings. +-// _DCHECK_NOP is a helper, and should not be used outside of this file. +-#define _TF_DCHECK_NOP(x, y) \ +- while (false && ((void)(x), (void)(y), 0)) LOG(FATAL) +- +-#define DCHECK_EQ(x, y) _TF_DCHECK_NOP(x, y) +-#define DCHECK_NE(x, y) _TF_DCHECK_NOP(x, y) +-#define DCHECK_LE(x, y) _TF_DCHECK_NOP(x, y) +-#define DCHECK_LT(x, y) _TF_DCHECK_NOP(x, y) +-#define DCHECK_GE(x, y) _TF_DCHECK_NOP(x, y) +-#define DCHECK_GT(x, y) _TF_DCHECK_NOP(x, y) +- +-#endif +- + // These are for when you don't want a CHECK failure to print a verbose + // stack trace. The implementation of CHECK* in this file already doesn't. + #define QCHECK(condition) CHECK(condition) +-- +2.25.1 + diff --git a/extras/nginx-mtls-auth/Dockerfile.redhat b/extras/nginx-mtls-auth/Dockerfile.redhat index f27a0ec64c..634f715600 100644 --- a/extras/nginx-mtls-auth/Dockerfile.redhat +++ b/extras/nginx-mtls-auth/Dockerfile.redhat @@ -21,7 +21,8 @@ RUN set -e ; \ set -x ; \ mkdir /certs ; \ unset LD_LIBRARY_PATH ; \ - microdnf install systemd wget findutils; \ + if [ -f /usr/bin/dnf ] ; then export DNF_TOOL=dnf ; else export DNF_TOOL=microdnf ; fi ; \ + $DNF_TOOL install -y systemd wget findutils; \ rpm -Uv http://nginx.org/packages/mainline/centos/8/x86_64/RPMS/nginx-1.21.6-1.el8.ngx.x86_64.rpm ; \ wget -O /usr/bin/dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.2/dumb-init_1.2.2_amd64 ; \ chmod +x /usr/bin/dumb-init ; \ diff --git a/package.json b/package.json new file mode 100644 index 0000000000..bbf7b45b51 --- /dev/null +++ b/package.json @@ -0,0 +1,8 @@ +{ + "name": "medipipe-dev", + "version": "0.0.0-alphga", + "description": "MediaPipe GitHub repo", + "devDependencies": { + "@bazel/typescript": "^5.7.1" + } +} diff --git a/release_files/Dockerfile.redhat b/release_files/Dockerfile.redhat index 2130922850..5a330c283d 100644 --- a/release_files/Dockerfile.redhat +++ b/release_files/Dockerfile.redhat @@ -14,8 +14,8 @@ # limitations under the License. # -ARG BASE_IMAGE=registry.access.redhat.com/ubi8/ubi:8.7 -FROM $BASE_IMAGE as base_build +ARG BASE_IMAGE=registry.access.redhat.com/ubi8/ubi-minimal:8.7 +FROM registry.access.redhat.com/ubi8/ubi:8.7 as base_build RUN yum install -y xz && yum clean all WORKDIR / COPY ovms.tar.xz / @@ -28,40 +28,29 @@ RUN mkdir /licenses && ln -s /ovms/LICENSE /licenses && ln -s /ovms/thirdparty-l RUN if [ -f /ovms/lib/libovms_shared.so ] ; then rm -rf /ovms/lib/libovms_shared.so ; else exit 0 ; fi ; # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # -FROM registry.access.redhat.com/ubi8/ubi-minimal:8.7 as release +FROM $BASE_IMAGE as release LABEL "name"="OVMS" LABEL "vendor"="Intel Corporation" -LABEL "version"="2022.3" -LABEL "release"="2022" +LABEL "version"="2023.0" +LABEL "release"="2023" LABEL "summary"="OpenVINO(TM) Model Server" LABEL "description"="OpenVINO(TM) Model Server is a solution for serving AI models" ARG INSTALL_RPMS_FROM_URL= ENV INSTALL_RPMS_FROM_URL=$INSTALL_RPMS_FROM_URL -ARG INSTALL_DRIVER_VERSION="22.28.23726" +ARG INSTALL_DRIVER_VERSION="22.43.24595" ENV INSTALL_DRIVER_VERSION=$INSTALL_DRIVER_VERSION +ARG NVIDIA=0 ARG GPU=1 -ENV GPU=$GPU WORKDIR / SHELL ["/bin/bash", "-o", "pipefail", "-c"] # hadolint ignore=DL3003,DL3041,SC2164 -RUN microdnf upgrade -y ; \ - microdnf install -y pkg-config && rpm -ivh https://vault.centos.org/centos/8/AppStream/x86_64/os/Packages/tbb-2018.2-9.el8.x86_64.rpm && \ +RUN if [ -f /usr/bin/dnf ] ; then export DNF_TOOL=dnf ; else export DNF_TOOL=microdnf ; fi ; \ + $DNF_TOOL upgrade --setopt=install_weak_deps=0 -y ; \ + $DNF_TOOL install -y pkg-config && rpm -ivh https://vault.centos.org/centos/8/AppStream/x86_64/os/Packages/tbb-2018.2-9.el8.x86_64.rpm && \ if [ "$GPU" == "1" ] ; then \ case $INSTALL_DRIVER_VERSION in \ - "20.35.17767") \ - mkdir /tmp/gpu_deps ; \ - curl -L --output /tmp/gpu_deps/intel-opencl-20.35.17767-1.el7.x86_64.rpm https://sourceforge.net/projects/intel-compute-runtime/files/20.35.17767/centos-7/intel-opencl-20.35.17767-1.el7.x86_64.rpm/download ; \ - curl -L --output /tmp/gpu_deps/level-zero-1.0.0-1.el7.x86_64.rpm https://sourceforge.net/projects/intel-compute-runtime/files/20.35.17767/centos-7/level-zero-1.0.0-1.el7.x86_64.rpm/download ; \ - curl -L --output /tmp/gpu_deps/level-zero-devel-1.0.0-1.el7.x86_64.rpm https://sourceforge.net/projects/intel-compute-runtime/files/20.35.17767/centos-7/level-zero-devel-1.0.0-1.el7.x86_64.rpm/download ; \ - curl -L --output /tmp/gpu_deps/intel-igc-opencl-1.0.4756-1.el7.x86_64.rpm https://sourceforge.net/projects/intel-compute-runtime/files/20.35.17767/centos-7/intel-igc-opencl-1.0.4756-1.el7.x86_64.rpm/download ; \ - curl -L --output /tmp/gpu_deps/intel-igc-opencl-devel-1.0.4756-1.el7.x86_64.rpm https://sourceforge.net/projects/intel-compute-runtime/files/20.35.17767/centos-7/intel-igc-opencl-devel-1.0.4756-1.el7.x86_64.rpm/download ; \ - curl -L --output /tmp/gpu_deps/intel-igc-core-1.0.4756-1.el7.x86_64.rpm https://sourceforge.net/projects/intel-compute-runtime/files/20.35.17767/centos-7/intel-igc-core-1.0.4756-1.el7.x86_64.rpm/download ; \ - curl -L --output /tmp/gpu_deps/intel-gmmlib-20.2.4-1.el7.x86_64.rpm https://sourceforge.net/projects/intel-compute-runtime/files/20.35.17767/centos-7/intel-gmmlib-20.2.4-1.el7.x86_64.rpm/download ; \ - curl -L --output /tmp/gpu_deps/intel-gmmlib-devel-20.2.4-1.el7.x86_64.rpm https://sourceforge.net/projects/intel-compute-runtime/files/20.35.17767/centos-7/intel-gmmlib-devel-20.2.4-1.el7.x86_64.rpm/download ; \ - cd /tmp/gpu_deps && rpm -iv *.rpm && rm -Rf /tmp/gpu_deps ; \ - ;; \ "21.38.21026") \ mkdir /tmp/gpu_deps ; \ curl -L --output /tmp/gpu_deps/intel-igc-core-1.0.8708-1.el8.x86_64.rpm https://download.copr.fedorainfracloud.org/results/jdanecki/intel-opencl/centos-stream-8-x86_64/02870435-intel-igc/intel-igc-core-1.0.8708-1.el8.x86_64.rpm ; \ @@ -73,7 +62,7 @@ RUN microdnf upgrade -y ; \ cd /tmp/gpu_deps && rpm -iv *.rpm && rm -Rf /tmp/gpu_deps ; \ ;; \ "22.10.22597") \ - microdnf install -y libedit ; \ + $DNF_TOOL install -y libedit ; \ rpm -ivh https://repositories.intel.com/graphics/rhel/8.5/intel-gmmlib-22.0.3-i699.3.el8.x86_64.rpm ; \ rpm -ivh https://repositories.intel.com/graphics/rhel/8.5/intel-igc-core-1.0.10409-i699.3.el8.x86_64.rpm ; \ rpm -ivh https://repositories.intel.com/graphics/rhel/8.5/level-zero-1.7.9-i699.3.el8.x86_64.rpm ; \ @@ -82,7 +71,7 @@ RUN microdnf upgrade -y ; \ rpm -ivh https://repositories.intel.com/graphics/rhel/8.5/intel-opencl-22.10.22597-i699.3.el8.x86_64.rpm ; \ ;; \ "22.28.23726") \ - microdnf install -y libedit ; \ + $DNF_TOOL install -y libedit ; \ rpm -ivh https://repositories.intel.com/graphics/rhel/8.5/intel-gmmlib-22.1.7-i419.el8.x86_64.rpm ; \ rpm -ivh https://repositories.intel.com/graphics/rhel/8.5/intel-igc-core-1.0.11485-i419.el8.x86_64.rpm ; \ rpm -ivh https://repositories.intel.com/graphics/rhel/8.5/intel-igc-opencl-1.0.11485-i419.el8.x86_64.rpm ; \ @@ -90,6 +79,15 @@ RUN microdnf upgrade -y ; \ rpm -ivh https://repositories.intel.com/graphics/rhel/8.5-devel/intel-level-zero-gpu-1.3.23453-i392.el8.x86_64.rpm ; \ rpm -ivh https://repositories.intel.com/graphics/rhel/8.5-devel/level-zero-1.8.1-i392.el8.x86_64.rpm ; \ ;; \ + "22.43.24595") \ + $DNF_TOOL install -y libedit ; \ + rpm -ivh https://repositories.intel.com/graphics/rhel/8.6/intel-gmmlib-22.3.1-i529.el8.x86_64.rpm ; \ + rpm -ivh https://repositories.intel.com/graphics/rhel/8.6/intel-igc-core-1.0.12504.6-i537.el8.x86_64.rpm ; \ + rpm -ivh https://repositories.intel.com/graphics/rhel/8.6/intel-igc-opencl-1.0.12504.6-i537.el8.x86_64.rpm ; \ + rpm -ivh https://repositories.intel.com/graphics/rhel/8.6/intel-opencl-22.43.24595.35-i538.el8.x86_64.rpm ; \ + rpm -ivh https://repositories.intel.com/graphics/rhel/8.6/intel-level-zero-gpu-1.3.24595.35-i538.el8.x86_64.rpm ; \ + rpm -ivh https://repositories.intel.com/graphics/rhel/8.6/level-zero-1.8.8-i524.el8.x86_64.rpm ; \ + ;; \ *) \ echo "ERROR: Unrecognized driver ${INSTALL_DRIVER_VERSION}." ; \ exit 1 ; \ @@ -102,7 +100,7 @@ RUN microdnf upgrade -y ; \ rpm -ivh http://mirror.centos.org/centos/8-stream/BaseOS/x86_64/os/Packages/numactl-2.0.12-11.el8.x86_64.rpm; \ rpm -ivh http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/ocl-icd-2.2.12-1.el8.x86_64.rpm; \ else \ - microdnf install -y tar gzip; \ + $DNF_TOOL install -y tar gzip; \ mkdir /tmp_ovms ; \ cd /tmp_ovms ; \ curl -L --fail -o deps.tar.xz "$INSTALL_RPMS_FROM_URL" ; \ @@ -112,11 +110,18 @@ RUN microdnf upgrade -y ; \ cd / ; \ rm -rf /tmp_ovms ; \ fi ; \ - microdnf install -y shadow-utils; \ + $DNF_TOOL install -y shadow-utils; \ cp -v /etc/ssl/certs/ca-bundle.crt /etc/ssl/certs/ca-certificates.crt ; \ groupadd --gid 5000 ovms && groupadd --gid 44 video1 && \ useradd --home-dir /home/ovms --create-home --uid 5000 --gid 5000 --groups 39,44 --shell /bin/bash --skel /dev/null ovms +# for NVIDIA +RUN if [ "$NVIDIA" == "1" ]; then true ; else exit 0 ; fi ; echo "installing cuda yum package"; \ + dnf install -y \ + libcudnn8-8.6.0.163-1.cuda11.8 \ + libcutensor1-1.6.1.5-1 && \ + dnf clean all + COPY --from=base_build /ovms /ovms COPY --from=base_build /licenses /licenses diff --git a/release_files/Dockerfile.ubuntu b/release_files/Dockerfile.ubuntu index bf1c432403..63d7520cc6 100644 --- a/release_files/Dockerfile.ubuntu +++ b/release_files/Dockerfile.ubuntu @@ -73,11 +73,10 @@ FROM $BASE_IMAGE as release ARG INSTALL_RPMS_FROM_URL= ENV INSTALL_RPMS_FROM_URL=$INSTALL_RPMS_FROM_URL ENV HDDL_INSTALL_DIR=/ovms/lib/hddl -ARG INSTALL_DRIVER_VERSION="22.35.24055" +ARG INSTALL_DRIVER_VERSION="23.13.26032" ENV INSTALL_DRIVER_VERSION=$INSTALL_DRIVER_VERSION ARG GPU=1 ARG NVIDIA=0 -ENV GPU=$GPU ENV DEBIAN_FRONTEND=noninteractive SHELL ["/bin/bash", "-c"] WORKDIR / @@ -90,16 +89,6 @@ RUN apt-get update -y ; \ if [ "$GPU" == "1" ] ; then \ apt-get update && apt-get install -y libnuma1 ocl-icd-libopencl1 --no-install-recommends && rm -rf /var/lib/apt/lists/* && \ case $INSTALL_DRIVER_VERSION in \ - "20.35.17767") \ - mkdir /tmp/gpu_deps && cd /tmp/gpu_deps ; \ - curl -L -O https://github.com/intel/compute-runtime/releases/download/20.35.17767/intel-gmmlib_20.2.4_amd64.deb ; \ - curl -L -O https://github.com/intel/compute-runtime/releases/download/20.35.17767/intel-igc-core_1.0.4756_amd64.deb ; \ - curl -L -O https://github.com/intel/compute-runtime/releases/download/20.35.17767/intel-igc-opencl_1.0.4756_amd64.deb ; \ - curl -L -O https://github.com/intel/compute-runtime/releases/download/20.35.17767/intel-opencl_20.35.17767_amd64.deb ; \ - curl -L -O https://github.com/intel/compute-runtime/releases/download/20.35.17767/intel-ocloc_20.35.17767_amd64.deb ; \ - curl -L -O https://github.com/intel/compute-runtime/releases/download/20.35.17767/intel-level-zero-gpu_1.0.17767_amd64.deb ; \ - dpkg -i intel*.deb && rm -Rf /tmp/gpu_deps ; \ - ;; \ "21.48.21782") \ mkdir /tmp/gpu_deps && cd /tmp/gpu_deps ; \ curl -L -O https://github.com/intel/compute-runtime/releases/download/21.48.21782/intel-gmmlib_21.3.3_amd64.deb ; \ @@ -131,6 +120,28 @@ RUN apt-get update -y ; \ apt-get clean ; \ rm -rf /var/lib/apt/lists/* && rm -rf /tmp/* ; \ ;; \ + "22.43.24595") \ + apt-get update && apt-get install -y --no-install-recommends gpg gpg-agent && \ + curl https://repositories.intel.com/graphics/intel-graphics.key | gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \ + echo 'deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/graphics/ubuntu focal-legacy main' | tee /etc/apt/sources.list.d/intel.gpu.focal.list && \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + intel-opencl-icd=22.43.24595.35+i538~20.04 \ + intel-level-zero-gpu=1.3.24595.35+i538~20.04 \ + level-zero=1.8.8+i524~u20.04 && \ + apt-get purge gpg gpg-agent --yes && apt-get --yes autoremove && \ + apt-get clean ; \ + rm -rf /var/lib/apt/lists/* && rm -rf /tmp/* ; \ + ;; \ + "23.13.26032") \ + mkdir /tmp/gpu_deps && cd /tmp/gpu_deps ; \ + curl -L -O https://github.com/intel/compute-runtime/releases/download/23.05.25593.11/libigdgmm12_22.3.0_amd64.deb ; \ + curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.13700.14/intel-igc-core_1.0.13700.14_amd64.deb ; \ + curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.13700.14/intel-igc-opencl_1.0.13700.14_amd64.deb ; \ + curl -L -O https://github.com/intel/compute-runtime/releases/download/23.13.26032.30/intel-opencl-icd_23.13.26032.30_amd64.deb ; \ + curl -L -O https://github.com/intel/compute-runtime/releases/download/23.13.26032.30/libigdgmm12_22.3.0_amd64.deb ; \ + dpkg -i *.deb && rm -Rf /tmp/gpu_deps ; \ + ;; \ *) \ dpkg -P intel-gmmlib intel-igc-core intel-igc-opencl intel-level-zero-gpu intel-ocloc intel-opencl intel-opencl-icd && \ apt-get update && apt-get -y --no-install-recommends install dpkg-dev && rm -rf /var/lib/apt/lists/* && \ diff --git a/release_files/thirdparty-licenses/blingfire.LICENSE.txt b/release_files/thirdparty-licenses/blingfire.LICENSE.txt new file mode 100644 index 0000000000..4b1ad51b2f --- /dev/null +++ b/release_files/thirdparty-licenses/blingfire.LICENSE.txt @@ -0,0 +1,21 @@ + MIT License + + Copyright (c) Microsoft Corporation. All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE diff --git a/security.md b/security.md new file mode 100644 index 0000000000..d85d4358b1 --- /dev/null +++ b/security.md @@ -0,0 +1,5 @@ +# Security Policy +Intel is committed to rapidly addressing security vulnerabilities affecting our customers and providing clear guidance on the solution, impact, severity and mitigation. + +## Reporting a Vulnerability +Please report any security vulnerabilities in this project [utilizing the guidelines here](https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html). diff --git a/src/BUILD b/src/BUILD index 6adee8bb7d..b373b570bd 100644 --- a/src/BUILD +++ b/src/BUILD @@ -21,6 +21,23 @@ # ], #) +load("@bazel_skylib//lib:selects.bzl", "selects") +load("@mediapipe//mediapipe/framework:more_selects.bzl", "more_selects") + +#To build without mediapipe use flags - bazel build --define MEDIAPIPE_DISABLE=1 --cxxopt=-DMEDIAPIPE_DISABLE=1 //src:ovms +config_setting( + name = "disable_mediapipe", + define_values = { + "MEDIAPIPE_DISABLE": "1", + }, + visibility = ["//visibility:public"], +) + +more_selects.config_setting_negation( + name = "not_disable_mediapipe", + negate = ":disable_mediapipe", +) + constraint_setting(name = "linux_distribution_family") constraint_value(constraint_setting = "linux_distribution_family", name = "fedora") # like RHEL/CentOS constraint_value(constraint_setting = "linux_distribution_family", name = "debian") # like Ubuntu @@ -43,16 +60,16 @@ cc_library( ) load("@com_google_protobuf//:protobuf.bzl", "cc_proto_library") +load("@tensorflow_serving//tensorflow_serving:serving.bzl", "serving_tensorflow_proto_dep") cc_shared_library( name = "ovms_shared", dynamic_deps = [], static_deps = ["@tensorflow_serving//:__subpackages__", "@minitrace//:__subpackages__", - "@rapidjson//:__subpackages__", - "@spdlog//:__subpackages__", + "@com_github_gabime_spdlog//:__subpackages__", "@fmtlib//:__subpackages__", - "@cxxopts//:__subpackages__", + "@com_github_jarro2783_cxxopts//:__subpackages__", "@awssdk//:__subpackages__", "@boost//:__subpackages__", "@com_github_googleapis_google_cloud_cpp//:__subpackages__", @@ -74,17 +91,63 @@ cc_shared_library( "@double_conversion//:__subpackages__", "@nsync//:__subpackages__", "@eigen_archive//:__subpackages__", - "@openvino//:__subpackages__", + "@linux_openvino//:__subpackages__", "@com_github_jupp0r_prometheus_cpp//:__subpackages__", "@//:__subpackages__", "@com_github_grpc_grpc//:__subpackages__", "@upb//:__subpackages__", "@zlib//:__subpackages__", - ], + "@oneTBB//:__subpackages__", + "@com_github_glog_glog//:__subpackages__", + "@com_github_gflags_gflags//:__subpackages__", + ] + select({ + "//conditions:default": [ + "@mediapipe//:__subpackages__", + "@mediapipe_calculators//:__subpackages__", + "@model_api//:__subpackages__", + "@arm_neon_2_x86_sse//:__subpackages__", + "@ruy//:__subpackages__", + "@cpuinfo//:__subpackages__", + "@clog//:__subpackages__", + "@gemmlowp//:__subpackages__", + "@flatbuffers//:__subpackages__", + ], + "//src:disable_mediapipe" : [], + }), features = [], roots = ["//src:ovms_lib"], ) +load("@mediapipe//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library", "mediapipe_proto_library") + +mediapipe_proto_library( + name = "ovmscalculatoroptions_proto", # ovmscalculatoroptions_cc_proto - just mediapipe stuff with mediapipe_proto_library adding nonvisible target + srcs = ["mediapipe_calculators/ovmscalculator.proto"], + visibility = ["//visibility:public"], + deps = [ + "@mediapipe//mediapipe/framework:calculator_options_proto", + "@mediapipe//mediapipe/framework:calculator_proto", + ], +) +mediapipe_proto_library( + name = "modelapiovmsinferencecalculator_proto", # ovmscalculatoroptions_cc_proto - just mediapipe stuff with mediapipe_proto_library adding nonvisible target + srcs = ["mediapipe_calculators/modelapiovmsinferencecalculator.proto"], + visibility = ["//visibility:public"], + deps = [ + "@mediapipe//mediapipe/framework:calculator_options_proto", + "@mediapipe//mediapipe/framework:calculator_proto", + ], +) +mediapipe_proto_library( + name = "modelapiovmssessioncalculator_proto", # ovmscalculatoroptions_cc_proto - just mediapipe stuff with mediapipe_proto_library adding nonvisible target + srcs = ["mediapipe_calculators/modelapiovmssessioncalculator.proto"], + visibility = ["//visibility:public"], + deps = [ + "@mediapipe//mediapipe/framework:calculator_options_proto", + "@mediapipe//mediapipe/framework:calculator_proto", + ], +) + cc_library( name = "ovms_lib", hdrs = ["ovms.h"], @@ -135,6 +198,7 @@ cc_library( "dags/exit_node.hpp", "dags/exitnodesession.cpp", "dags/exitnodesession.hpp", + "filesystem.cpp", "filesystem.hpp", "get_model_metadata_impl.cpp", "get_model_metadata_impl.hpp", @@ -260,6 +324,8 @@ cc_library( "schema.cpp", "serialization.cpp", "serialization.hpp", + "servablemetadata.cpp", + "servablemetadata.hpp", "servablemanagermodule.cpp", "servablemanagermodule.hpp", "server_settings.hpp", @@ -281,8 +347,6 @@ cc_library( "stringutils.hpp", "systeminfo.cpp", "systeminfo.hpp", - "systeminfo_impl.cpp", - "systeminfo_impl.hpp", "queue.hpp", "tensorinfo.cpp", "tensorinfo.hpp", @@ -295,34 +359,39 @@ cc_library( "version.hpp", "logging.hpp", "logging.cpp", - "binaryutils.hpp", - "binaryutils.cpp", - ], + "tensor_conversion.hpp", + "tensor_conversion.cpp", + ] + select({ + "//conditions:default": [ + "mediapipe_internal/mediapipefactory.cpp", + "mediapipe_internal/mediapipefactory.hpp", + "mediapipe_internal/mediapipegraphconfig.hpp", + "mediapipe_internal/mediapipegraphconfig.cpp", + "mediapipe_internal/mediapipegraphdefinition.cpp", + "mediapipe_internal/mediapipegraphdefinition.hpp", + "mediapipe_internal/mediapipegraphexecutor.cpp", + "mediapipe_internal/mediapipegraphexecutor.hpp", + "mediapipe_calculators/modelapiovmsadapter.cc", + "mediapipe_calculators/modelapiovmsadapter.hpp", + "mediapipe_calculators/modelapiovmsadapterwrapper.cc", + "mediapipe_calculators/modelapiovmsadapterwrapper.hpp", + "mediapipe_calculators/ovms_calculator.cc", + "mediapipe_calculators/modelapiovmsinferencecalculator.cc", + "mediapipe_calculators/modelapiovmssessioncalculator.cc", + ], + "//src:disable_mediapipe" : [], + }), deps = [ - "@tensorflow_serving//tensorflow_serving/apis:prediction_service_cc_proto", - "@tensorflow_serving//tensorflow_serving/apis:model_service_cc_proto", - "@minitrace//:trace", - "@com_github_grpc_grpc//:grpc++", - "@org_tensorflow//tensorflow/core:framework", - "@rapidjson//:rapidjson", - "@spdlog//:spdlog", - "@cxxopts//:cxxopts", - "@awssdk//:s3", - "@awssdk//:core", - "@awssdk//:deps", - "@azure//:storage", - "@cpprest//:sdk", - "@boost//:lib", - "@com_github_googleapis_google_cloud_cpp//google/cloud/storage:storage_client", - "@tensorflow_serving//tensorflow_serving/util/net_http/server/public:http_server", - "@tensorflow_serving//tensorflow_serving/util/net_http/server/public:http_server_api", - "@tensorflow_serving//tensorflow_serving/util:threadpool_executor", - "@tensorflow_serving//tensorflow_serving/util:json_tensor", - "@openvino//:openvino", - "@opencv//:opencv", - "@com_github_jupp0r_prometheus_cpp//core", + "//:ovms_dependencies", "//src/kfserving_api:kfserving_api_cpp", - ], + ] + select({ + "//conditions:default": [ + "//src:ovmscalculatoroptions_cc_proto", # ovmscalculatoroptions_proto - just mediapipe stuff with mediapipe_proto_library adding nonvisible target + "//src:modelapiovmsinferencecalculator_cc_proto", # modelapiovmsinferencecalculator_proto - just mediapipe stuff with mediapipe_proto_library adding nonvisible target + "//src:modelapiovmssessioncalculator_cc_proto", # modelapiovmssessioncalculator_proto - just mediapipe stuff with mediapipe_proto_library adding nonvisible target + ], + "//src:disable_mediapipe" : [], + }), local_defines = [ "SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_TRACE", ], @@ -339,7 +408,8 @@ cc_library( "-luuid", "-lstdc++fs", "-lcrypto", - ] + ], + alwayslink = 1, ) cc_binary( @@ -350,7 +420,7 @@ cc_binary( ], linkshared = 1, deps = [ - "@rapidjson//:rapidjson", + "@com_github_tencent_rapidjson//:rapidjson", ], ) @@ -426,7 +496,7 @@ cc_binary( "custom_node_interface.h", ], deps = [ - "@opencv//:opencv" + "@linux_opencv//:opencv" ], linkshared = 1, copts = [ @@ -445,7 +515,7 @@ cc_binary( "custom_node_interface.h", ], deps = [ - "@opencv//:opencv" + "@linux_opencv//:opencv" ], linkshared = 1, copts = [ @@ -490,7 +560,7 @@ cc_binary( "custom_node_interface.h", ], deps = [ - "@opencv//:opencv" + "@linux_opencv//:opencv" ], linkshared = 1, copts = [ @@ -509,7 +579,7 @@ cc_binary( "custom_node_interface.h", ], deps = [ - "@opencv//:opencv" + "@linux_opencv//:opencv" ], linkshared = 1, copts = [ @@ -528,7 +598,7 @@ cc_binary( "custom_node_interface.h", ], deps = [ - "@opencv//:opencv" + "@linux_opencv//:opencv" ], linkshared = 1, copts = [ @@ -577,8 +647,6 @@ cc_binary( name = "capi_benchmark", srcs = [ "main_benchmark.cpp", - "stringutils.cpp", - "stringutils.hpp", ], linkopts = [ "-lpthread", @@ -608,6 +676,7 @@ cc_binary( # "-lovms_shared", # Use for dynamic linking when neccessary ], copts = [ + "-Wall", "-Wconversion", "-Werror", ], @@ -623,7 +692,8 @@ cc_test( linkstatic = 1, srcs = [ "test/azurefilesystem_test.cpp", - "test/binaryutils_test.cpp", + "test/tensor_conversion_test.cpp", + "test/c_api_test_utils.hpp", "test/c_api_tests.cpp", "test/custom_loader_test.cpp", "test/custom_node_output_allocator_test.cpp", @@ -693,16 +763,65 @@ cc_test( "test/test_utils.hpp", "test/threadsafequeue_test.cpp", "test/unit_tests.cpp", - ], + ] + select({ + "//conditions:default": [ + "test/mediapipe/inputsidepacketusertestcalc.cc", + "test/mediapipeflow_test.cpp", + "test/mediapipe_validation_test.cpp", + ], + "//src:disable_mediapipe" : [], + }), data = [ + "test/add_two_inputs_model/1/add.xml", + "test/add_two_inputs_model/1/add.bin", + "test/binaryutils/grayscale.jpg", + "test/binaryutils/rgb.jpg", + "test/binaryutils/rgb2x2.jpg", + "test/binaryutils/rgb4x4.jpg", + "test/c_api/config.json", + "test/c_api/config_benchmark.json", + "test/c_api/config_dummy_dag.json", + "test/c_api/config_dummy_dynamic_entry_dag.json", + "test/c_api/config_metadata_all.json", + "test/c_api/config_standard_dummy.json", + "test/configs/emptyConfigWithMetrics.json", "test/dummy/1/dummy.xml", "test/dummy/1/dummy.bin", "test/dummy_fp64/1/saved_model.xml", "test/dummy_fp64/1/saved_model.bin", - "test/add_two_inputs_model/1/add.xml", - "test/add_two_inputs_model/1/add.bin", + "test/dummy_saved_model/1/saved_model.pb", + "test/dummyUppercase/1/dummy.xml", + "test/dummyUppercase/1/dummy.bin", "test/increment_1x3x4x5/1/increment_1x3x4x5.xml", "test/increment_1x3x4x5/1/increment_1x3x4x5.bin", + "test/mediapipe/subconfig.json", + "test/mediapipe/config_mediapipe_add_adapter_full.json", + "test/mediapipe/config_mediapipe_dummy_adapter_full_subconfig.json", + "test/mediapipe/config_mediapipe_all_graphs_adapter_full.json", + "test/mediapipe/config_mediapipe_dummy_adapter_full_dag.json", + "test/mediapipe/config_mediapipe_dummy_adapter_full_dummy_in_both_config_and_subconfig.json", + "test/mediapipe/config_mediapipe_dummy_adapter_full.json", + "test/mediapipe/config_mediapipe_graph_with_side_packets.json", + "test/mediapipe/config_standard_add.json", + "test/mediapipe/config_standard_dummy.json", + "test/mediapipe/graphadd.pbtxt", + "test/mediapipe/graphaddadapterfull.pbtxt", + "test/mediapipe/graphdummy.pbtxt", + "test/mediapipe/graphdummyadapterfull.pbtxt", + "test/mediapipe/graphWithParams.pbtxt", + "test/mediapipe/relative_paths/config_relative_dummy_negative.json", + "test/mediapipe/relative_paths/config_relative_add_subconfig_negative.json", + "test/mediapipe/relative_paths/config_relative_add_subconfig.json", + "test/mediapipe/relative_paths/config_relative_dummy.json", + "test/mediapipe/relative_paths/graph1/dummy1/1/dummy.xml", + "test/mediapipe/relative_paths/graph2/dummy2/1/dummy.xml", + "test/mediapipe/relative_paths/graph1/graphaddadapterfull.pbtxt", + "test/mediapipe/relative_paths/graph1/subconfig.json", + "test/mediapipe/relative_paths/graph2/graphadd.pbtxt", + "test/mediapipe/graphdummyadapterfull_dummyinputnames.pbtxt", + "test/mediapipe/relative_paths/graph2/subconfig.json", + "test/passthrough/1/passthrough.xml", + "test/passthrough/1/passthrough.bin", "test/summator/1/summator.xml", "test/summator/1/summator.bin", ], diff --git a/src/azurefilesystem.cpp b/src/azurefilesystem.cpp index a16e4b4e10..d71ae9e938 100644 --- a/src/azurefilesystem.cpp +++ b/src/azurefilesystem.cpp @@ -24,9 +24,6 @@ namespace ovms { -const std::string AzureFileSystem::AZURE_URL_FILE_PREFIX = "azfs://"; -const std::string AzureFileSystem::AZURE_URL_BLOB_PREFIX = "az://"; - static as::cloud_storage_account createDefaultOrAnonymousAccount() { try { const char* env_cred = std::getenv("AZURE_STORAGE_CONNECTION_STRING"); diff --git a/src/azurefilesystem.hpp b/src/azurefilesystem.hpp index ed7dd1096e..3b0d26370f 100644 --- a/src/azurefilesystem.hpp +++ b/src/azurefilesystem.hpp @@ -131,10 +131,6 @@ class AzureFileSystem : public FileSystem { */ StatusCode deleteFileFolder(const std::string& path) override; - static const std::string AZURE_URL_FILE_PREFIX; - - static const std::string AZURE_URL_BLOB_PREFIX; - private: /** * diff --git a/src/azurestorage.cpp b/src/azurestorage.cpp index 2652fb31f3..b6eecd760d 100644 --- a/src/azurestorage.cpp +++ b/src/azurestorage.cpp @@ -37,29 +37,6 @@ const std::string AzureStorageAdapter::extractAzureStorageExceptionMessage(const } } -std::string AzureStorageAdapter::joinPath(std::initializer_list segments) { - std::string joined; - - for (const auto& seg : segments) { - if (joined.empty()) { - joined = seg; - } else if (isAbsolutePath(seg)) { - if (joined[joined.size() - 1] == '/') { - joined.append(seg.substr(1)); - } else { - joined.append(seg); - } - } else { - if (joined[joined.size() - 1] != '/') { - joined.append("/"); - } - joined.append(seg); - } - } - - return joined; -} - StatusCode AzureStorageAdapter::CreateLocalDir(const std::string& path) { int status = mkdir(const_cast(path.c_str()), S_IRUSR | S_IWUSR | S_IXUSR); @@ -470,8 +447,8 @@ StatusCode AzureStorageBlob::downloadFileFolderTo(const std::string& local_path) } for (auto&& d : dirs) { - std::string remote_dir_path = joinPath({fullUri_, d}); - std::string local_dir_path = joinPath({local_path, d}); + std::string remote_dir_path = FileSystem::joinPath({fullUri_, d}); + std::string local_dir_path = FileSystem::joinPath({local_path, d}); SPDLOG_LOGGER_TRACE(azurestorage_logger, "Processing directory {} from {} -> {}", d, remote_dir_path, local_dir_path); @@ -498,8 +475,8 @@ StatusCode AzureStorageBlob::downloadFileFolderTo(const std::string& local_path) } for (auto&& f : files) { - std::string remote_file_path = joinPath({fullUri_, f}); - std::string local_file_path = joinPath({local_path, f}); + std::string remote_file_path = FileSystem::joinPath({fullUri_, f}); + std::string local_file_path = FileSystem::joinPath({local_path, f}); SPDLOG_LOGGER_TRACE(azurestorage_logger, "Processing file {} from {} -> {}", f, remote_file_path, local_file_path); @@ -555,9 +532,9 @@ StatusCode AzureStorageBlob::parseFilePath(const std::string& path) { fullUri_ = path; int share_start = 0; // Blob path - if (path.find(AzureFileSystem::AZURE_URL_BLOB_PREFIX) != std::string::npos) { - share_start = path.find(AzureFileSystem::AZURE_URL_BLOB_PREFIX) + AzureFileSystem::AZURE_URL_BLOB_PREFIX.size(); - } else if (path.find(AzureFileSystem::AZURE_URL_FILE_PREFIX) != std::string::npos) { + if (path.find(FileSystem::AZURE_URL_BLOB_PREFIX) != std::string::npos) { + share_start = path.find(FileSystem::AZURE_URL_BLOB_PREFIX) + FileSystem::AZURE_URL_BLOB_PREFIX.size(); + } else if (path.find(FileSystem::AZURE_URL_FILE_PREFIX) != std::string::npos) { // File path SPDLOG_LOGGER_ERROR(azurestorage_logger, "Wrong object type - az:// prefix in path required, azure:// found:", path); return StatusCode::AS_INVALID_PATH; @@ -1064,8 +1041,8 @@ StatusCode AzureStorageFile::downloadFileFolderTo(const std::string& local_path) } for (auto&& d : dirs) { - std::string remote_dir_path = joinPath({fullUri_, d}); - std::string local_dir_path = joinPath({local_path, d}); + std::string remote_dir_path = FileSystem::joinPath({fullUri_, d}); + std::string local_dir_path = FileSystem::joinPath({local_path, d}); SPDLOG_LOGGER_TRACE(azurestorage_logger, "Processing directory {} from {} -> {}", d, remote_dir_path, local_dir_path); @@ -1092,8 +1069,8 @@ StatusCode AzureStorageFile::downloadFileFolderTo(const std::string& local_path) } for (auto&& f : files) { - std::string remote_file_path = joinPath({fullUri_, f}); - std::string local_file_path = joinPath({local_path, f}); + std::string remote_file_path = FileSystem::joinPath({fullUri_, f}); + std::string local_file_path = FileSystem::joinPath({local_path, f}); SPDLOG_LOGGER_TRACE(azurestorage_logger, "Processing file {} from {} -> {}", f, remote_file_path, local_file_path); @@ -1154,9 +1131,9 @@ StatusCode AzureStorageFile::parseFilePath(const std::string& path) { fullUri_ = path; int share_start = 0; // File or directory path - if (path.find(AzureFileSystem::AZURE_URL_FILE_PREFIX) != std::string::npos) { - share_start = path.find(AzureFileSystem::AZURE_URL_FILE_PREFIX) + AzureFileSystem::AZURE_URL_FILE_PREFIX.size(); - } else if (path.find(AzureFileSystem::AZURE_URL_BLOB_PREFIX) != std::string::npos) { + if (path.find(FileSystem::AZURE_URL_FILE_PREFIX) != std::string::npos) { + share_start = path.find(FileSystem::AZURE_URL_FILE_PREFIX) + FileSystem::AZURE_URL_FILE_PREFIX.size(); + } else if (path.find(FileSystem::AZURE_URL_BLOB_PREFIX) != std::string::npos) { // Blob path SPDLOG_LOGGER_ERROR(azurestorage_logger, "Wrong object type. azfs:// prefix in path required, found az://:", path); return StatusCode::AS_INVALID_PATH; @@ -1212,7 +1189,7 @@ std::shared_ptr AzureStorageFactory::getNewAzureStorageObje } bool AzureStorageFactory::isBlobStoragePath(std::string path) { - return (path.find(AzureFileSystem::AZURE_URL_BLOB_PREFIX) != std::string::npos); + return (path.find(FileSystem::AZURE_URL_BLOB_PREFIX) != std::string::npos); } } // namespace ovms diff --git a/src/azurestorage.hpp b/src/azurestorage.hpp index afff5dd988..1cc0f27b93 100644 --- a/src/azurestorage.hpp +++ b/src/azurestorage.hpp @@ -58,7 +58,6 @@ class AzureStorageAdapter { virtual StatusCode downloadFileFolderTo(const std::string& local_path) = 0; virtual StatusCode checkPath(const std::string& path) = 0; - std::string joinPath(std::initializer_list segments); StatusCode CreateLocalDir(const std::string& path); bool isAbsolutePath(const std::string& path); std::vector FindSubdirectories(std::string path); diff --git a/src/capi_frontend/capi.cpp b/src/capi_frontend/capi.cpp index 03c2c38cf8..6bb70e3ec8 100644 --- a/src/capi_frontend/capi.cpp +++ b/src/capi_frontend/capi.cpp @@ -14,11 +14,14 @@ // limitations under the License. //***************************************************************************** #include +#include #include #include #include "../buffer.hpp" #include "../dags/pipeline.hpp" +#include "../dags/pipelinedefinition.hpp" +#include "../dags/pipelinedefinitionunloadguard.hpp" #include "../execution_context.hpp" #include "../inferenceparameter.hpp" #include "../inferencerequest.hpp" @@ -29,12 +32,15 @@ #include "../modelinstanceunloadguard.hpp" #include "../modelmanager.hpp" #include "../ovms.h" // NOLINT +#include "../prediction_service.hpp" #include "../profiler.hpp" #include "../servablemanagermodule.hpp" +#include "../servablemetadata.hpp" #include "../server.hpp" #include "../server_settings.hpp" #include "../status.hpp" #include "../timer.hpp" +#include "capi_utils.hpp" using ovms::Buffer; using ovms::ExecutionContext; @@ -44,6 +50,9 @@ using ovms::InferenceResponse; using ovms::InferenceTensor; using ovms::ModelInstanceUnloadGuard; using ovms::ModelManager; +using ovms::Pipeline; +using ovms::PipelineDefinition; +using ovms::PipelineDefinitionUnloadGuard; using ovms::ServableManagerModule; using ovms::Server; using ovms::Status; @@ -55,6 +64,16 @@ using std::chrono::microseconds; extern "C" { #endif +OVMS_Status* OVMS_ApiVersion(uint32_t* major, uint32_t* minor) { + if (major == nullptr) + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "major version")); + if (minor == nullptr) + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "minor version")); + *major = OVMS_API_VERSION_MAJOR; + *minor = OVMS_API_VERSION_MINOR; + return nullptr; +} + void OVMS_StatusDelete(OVMS_Status* status) { if (status == nullptr) return; @@ -64,9 +83,9 @@ void OVMS_StatusDelete(OVMS_Status* status) { OVMS_Status* OVMS_StatusGetCode(OVMS_Status* status, uint32_t* code) { if (status == nullptr) - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STATUS)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "status")); if (code == nullptr) - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_NUMBER)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "code")); ovms::Status* sts = reinterpret_cast(status); *code = static_cast(sts->getCode()); return nullptr; @@ -75,9 +94,9 @@ OVMS_Status* OVMS_StatusGetCode(OVMS_Status* status, OVMS_Status* OVMS_StatusGetDetails(OVMS_Status* status, const char** details) { if (status == nullptr) - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STATUS)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "status")); if (details == nullptr) - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STRING)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "details")); ovms::Status* sts = reinterpret_cast(status); *details = sts->string().c_str(); return nullptr; @@ -85,7 +104,7 @@ OVMS_Status* OVMS_StatusGetDetails(OVMS_Status* status, OVMS_Status* OVMS_ServerSettingsNew(OVMS_ServerSettings** settings) { if (settings == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "settings")); } *settings = reinterpret_cast(new ovms::ServerSettingsImpl); return nullptr; @@ -99,7 +118,7 @@ void OVMS_ServerSettingsDelete(OVMS_ServerSettings* settings) { OVMS_Status* OVMS_ModelsSettingsNew(OVMS_ModelsSettings** settings) { if (settings == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "model settings")); } *settings = reinterpret_cast(new ovms::ModelsSettingsImpl); return nullptr; @@ -114,7 +133,11 @@ void OVMS_ModelsSettingsDelete(OVMS_ModelsSettings* settings) { OVMS_Status* OVMS_ServerNew(OVMS_Server** server) { // Create new server once multi server configuration becomes possible. if (server == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SERVER)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server")); + } + // Hack to force spdlog singleton to initialize before ovms::Server singleton + if (spdlog::get("notUsedLogger")) { + return reinterpret_cast(new Status(StatusCode::INTERNAL_ERROR, "unexpected error during spdlog configuration")); } *server = reinterpret_cast(&ovms::Server::instance()); return nullptr; @@ -132,13 +155,13 @@ OVMS_Status* OVMS_ServerStartFromConfigurationFile(OVMS_Server* server, OVMS_ServerSettings* server_settings, OVMS_ModelsSettings* models_settings) { if (server == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SERVER)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server")); } if (server_settings == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server settings")); } if (models_settings == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "model settings")); } ovms::Server* srv = reinterpret_cast(server); ovms::ServerSettingsImpl* serverSettings = reinterpret_cast(server_settings); @@ -152,7 +175,7 @@ OVMS_Status* OVMS_ServerStartFromConfigurationFile(OVMS_Server* server, OVMS_Status* OVMS_ServerSettingsSetGrpcPort(OVMS_ServerSettings* settings, uint32_t grpcPort) { if (settings == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server settings")); } ovms::ServerSettingsImpl* serverSettings = reinterpret_cast(settings); serverSettings->grpcPort = grpcPort; @@ -162,7 +185,7 @@ OVMS_Status* OVMS_ServerSettingsSetGrpcPort(OVMS_ServerSettings* settings, OVMS_Status* OVMS_ServerSettingsSetRestPort(OVMS_ServerSettings* settings, uint32_t restPort) { if (settings == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server settings")); } ovms::ServerSettingsImpl* serverSettings = reinterpret_cast(settings); serverSettings->restPort = restPort; @@ -172,7 +195,7 @@ OVMS_Status* OVMS_ServerSettingsSetRestPort(OVMS_ServerSettings* settings, OVMS_Status* OVMS_ServerSettingsSetGrpcWorkers(OVMS_ServerSettings* settings, uint32_t grpc_workers) { if (settings == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server settings")); } ovms::ServerSettingsImpl* serverSettings = reinterpret_cast(settings); serverSettings->grpcWorkers = grpc_workers; @@ -182,10 +205,10 @@ OVMS_Status* OVMS_ServerSettingsSetGrpcWorkers(OVMS_ServerSettings* settings, OVMS_Status* OVMS_ServerSettingsSetGrpcBindAddress(OVMS_ServerSettings* settings, const char* grpc_bind_address) { if (settings == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server settings")); } if (grpc_bind_address == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STRING)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "grpc bind address")); } ovms::ServerSettingsImpl* serverSettings = reinterpret_cast(settings); serverSettings->grpcBindAddress.assign(grpc_bind_address); @@ -195,7 +218,7 @@ OVMS_Status* OVMS_ServerSettingsSetGrpcBindAddress(OVMS_ServerSettings* settings OVMS_Status* OVMS_ServerSettingsSetRestWorkers(OVMS_ServerSettings* settings, uint32_t rest_workers) { if (settings == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server settings")); } ovms::ServerSettingsImpl* serverSettings = reinterpret_cast(settings); serverSettings->restWorkers = rest_workers; @@ -205,10 +228,10 @@ OVMS_Status* OVMS_ServerSettingsSetRestWorkers(OVMS_ServerSettings* settings, OVMS_Status* OVMS_ServerSettingsSetRestBindAddress(OVMS_ServerSettings* settings, const char* rest_bind_address) { if (settings == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server settings")); } if (rest_bind_address == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STRING)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "rest bind address")); } ovms::ServerSettingsImpl* serverSettings = reinterpret_cast(settings); serverSettings->restBindAddress.assign(rest_bind_address); @@ -218,10 +241,10 @@ OVMS_Status* OVMS_ServerSettingsSetRestBindAddress(OVMS_ServerSettings* settings OVMS_Status* OVMS_ServerSettingsSetGrpcChannelArguments(OVMS_ServerSettings* settings, const char* grpc_channel_arguments) { if (settings == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server settings")); } if (grpc_channel_arguments == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STRING)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "grpc channel arguments")); } ovms::ServerSettingsImpl* serverSettings = reinterpret_cast(settings); serverSettings->grpcChannelArguments.assign(grpc_channel_arguments); @@ -231,7 +254,7 @@ OVMS_Status* OVMS_ServerSettingsSetGrpcChannelArguments(OVMS_ServerSettings* set OVMS_Status* OVMS_ServerSettingsSetFileSystemPollWaitSeconds(OVMS_ServerSettings* settings, uint32_t seconds) { if (settings == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server settings")); } ovms::ServerSettingsImpl* serverSettings = reinterpret_cast(settings); serverSettings->filesystemPollWaitSeconds = seconds; @@ -241,7 +264,7 @@ OVMS_Status* OVMS_ServerSettingsSetFileSystemPollWaitSeconds(OVMS_ServerSettings OVMS_Status* OVMS_ServerSettingsSetSequenceCleanerPollWaitMinutes(OVMS_ServerSettings* settings, uint32_t minutes) { if (settings == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server settings")); } ovms::ServerSettingsImpl* serverSettings = reinterpret_cast(settings); serverSettings->sequenceCleanerPollWaitMinutes = minutes; @@ -251,7 +274,7 @@ OVMS_Status* OVMS_ServerSettingsSetSequenceCleanerPollWaitMinutes(OVMS_ServerSet OVMS_Status* OVMS_ServerSettingsSetCustomNodeResourcesCleanerIntervalSeconds(OVMS_ServerSettings* settings, uint32_t seconds) { if (settings == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server settings")); } ovms::ServerSettingsImpl* serverSettings = reinterpret_cast(settings); serverSettings->resourcesCleanerPollWaitSeconds = seconds; @@ -261,10 +284,10 @@ OVMS_Status* OVMS_ServerSettingsSetCustomNodeResourcesCleanerIntervalSeconds(OVM OVMS_Status* OVMS_ServerSettingsSetCpuExtensionPath(OVMS_ServerSettings* settings, const char* cpu_extension_path) { if (settings == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server settings")); } if (cpu_extension_path == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STRING)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "cpu extension path")); } ovms::ServerSettingsImpl* serverSettings = reinterpret_cast(settings); serverSettings->cpuExtensionLibraryPath.assign(cpu_extension_path); @@ -274,10 +297,10 @@ OVMS_Status* OVMS_ServerSettingsSetCpuExtensionPath(OVMS_ServerSettings* setting OVMS_Status* OVMS_ServerSettingsSetCacheDir(OVMS_ServerSettings* settings, const char* cache_dir) { if (settings == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server settings")); } if (cache_dir == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STRING)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "cache dir")); } ovms::ServerSettingsImpl* serverSettings = reinterpret_cast(settings); serverSettings->cacheDir.assign(cache_dir); @@ -287,7 +310,7 @@ OVMS_Status* OVMS_ServerSettingsSetCacheDir(OVMS_ServerSettings* settings, OVMS_Status* OVMS_ServerSettingsSetLogLevel(OVMS_ServerSettings* settings, OVMS_LogLevel log_level) { if (settings == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server settings")); } ovms::ServerSettingsImpl* serverSettings = reinterpret_cast(settings); switch (log_level) { @@ -315,10 +338,10 @@ OVMS_Status* OVMS_ServerSettingsSetLogLevel(OVMS_ServerSettings* settings, OVMS_Status* OVMS_ServerSettingsSetLogPath(OVMS_ServerSettings* settings, const char* log_path) { if (settings == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server settings")); } if (log_path == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STRING)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "log path")); } ovms::ServerSettingsImpl* serverSettings = reinterpret_cast(settings); serverSettings->logPath.assign(log_path); @@ -328,25 +351,25 @@ OVMS_Status* OVMS_ServerSettingsSetLogPath(OVMS_ServerSettings* settings, OVMS_Status* OVMS_ModelsSettingsSetConfigPath(OVMS_ModelsSettings* settings, const char* config_path) { if (settings == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "models settings")); } if (config_path == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STRING)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "config path")); } ovms::ModelsSettingsImpl* modelsSettings = reinterpret_cast(settings); modelsSettings->configPath.assign(config_path); return nullptr; } // inference API -OVMS_Status* OVMS_InferenceRequestNew(OVMS_InferenceRequest** request, OVMS_Server* server, const char* servableName, uint32_t servableVersion) { +OVMS_Status* OVMS_InferenceRequestNew(OVMS_InferenceRequest** request, OVMS_Server* server, const char* servableName, int64_t servableVersion) { if (request == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_REQUEST)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "inference request")); } if (server == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SERVER)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server")); } if (servableName == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STRING)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "servable name")); } *request = reinterpret_cast(new InferenceRequest(servableName, servableVersion)); return nullptr; @@ -358,51 +381,74 @@ void OVMS_InferenceRequestDelete(OVMS_InferenceRequest* request) { delete reinterpret_cast(request); } -OVMS_Status* OVMS_InferenceRequestAddInput(OVMS_InferenceRequest* req, const char* inputName, OVMS_DataType datatype, const uint64_t* shape, uint32_t dimCount) { +OVMS_Status* OVMS_InferenceRequestAddInput(OVMS_InferenceRequest* req, const char* inputName, OVMS_DataType datatype, const int64_t* shape, size_t dimCount) { if (req == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_REQUEST)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "inference request")); } if (inputName == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STRING)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "input name")); } if (shape == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_TABLE)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "shape")); } InferenceRequest* request = reinterpret_cast(req); auto status = request->addInput(inputName, datatype, shape, dimCount); if (!status.ok()) { return reinterpret_cast(new Status(status)); } + if (spdlog::default_logger_raw()->level() == spdlog::level::trace) { + std::stringstream ss; + ss << "C-API adding request input for servable: " << request->getServableName() + << " version: " << request->getServableVersion() + << " name: " << inputName + << " datatype: " << toString(ovms::getOVMSDataTypeAsPrecision(datatype)) + << " shape: ["; + size_t i = 0; + for (i = 0; i < dimCount - 1; ++i) { + ss << shape[i] << ", "; + } + ss << shape[i] << "]"; + SPDLOG_TRACE(ss.str()); + } return nullptr; } OVMS_Status* OVMS_InferenceRequestInputSetData(OVMS_InferenceRequest* req, const char* inputName, const void* data, size_t bufferSize, OVMS_BufferType bufferType, uint32_t deviceId) { if (req == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_REQUEST)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "inference request")); } if (inputName == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STRING)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "input name")); } if (data == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_DATA)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "data")); } InferenceRequest* request = reinterpret_cast(req); auto status = request->setInputBuffer(inputName, data, bufferSize, bufferType, deviceId); if (!status.ok()) { return reinterpret_cast(new Status(status)); } + if (spdlog::default_logger_raw()->level() == spdlog::level::trace) { + std::stringstream ss; + ss << "C-API setting request input data for servable: " << request->getServableName() + << " version: " << request->getServableVersion() + << " name: " << inputName + << " bufferType: " << bufferType + << " deviceId: " << deviceId; + SPDLOG_TRACE(ss.str()); + } return nullptr; } OVMS_Status* OVMS_InferenceRequestAddParameter(OVMS_InferenceRequest* req, const char* parameterName, OVMS_DataType datatype, const void* data, size_t byteSize) { if (req == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_REQUEST)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "inference request")); } if (parameterName == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STRING)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "parameter name")); } if (data == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_DATA)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "data")); } InferenceRequest* request = reinterpret_cast(req); auto status = request->addParameter(parameterName, datatype, data); @@ -414,10 +460,10 @@ OVMS_Status* OVMS_InferenceRequestAddParameter(OVMS_InferenceRequest* req, const OVMS_Status* OVMS_InferenceRequestRemoveParameter(OVMS_InferenceRequest* req, const char* parameterName) { if (req == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_REQUEST)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "inference request")); } if (parameterName == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STRING)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "parameter name")); } InferenceRequest* request = reinterpret_cast(req); auto status = request->removeParameter(parameterName); @@ -429,10 +475,10 @@ OVMS_Status* OVMS_InferenceRequestRemoveParameter(OVMS_InferenceRequest* req, co OVMS_Status* OVMS_InferenceRequestRemoveInput(OVMS_InferenceRequest* req, const char* inputName) { if (req == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_REQUEST)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "inference request")); } if (inputName == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STRING)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "input name")); } InferenceRequest* request = reinterpret_cast(req); auto status = request->removeInput(inputName); @@ -444,10 +490,10 @@ OVMS_Status* OVMS_InferenceRequestRemoveInput(OVMS_InferenceRequest* req, const OVMS_Status* OVMS_InferenceRequestInputRemoveData(OVMS_InferenceRequest* req, const char* inputName) { if (req == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_REQUEST)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "inference request")); } if (inputName == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STRING)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "input name")); } InferenceRequest* request = reinterpret_cast(req); auto status = request->removeInputBuffer(inputName); @@ -457,33 +503,33 @@ OVMS_Status* OVMS_InferenceRequestInputRemoveData(OVMS_InferenceRequest* req, co return nullptr; } -OVMS_Status* OVMS_InferenceResponseGetOutput(OVMS_InferenceResponse* res, uint32_t id, const char** name, OVMS_DataType* datatype, const uint64_t** shape, uint32_t* dimCount, const void** data, size_t* bytesize, OVMS_BufferType* bufferType, uint32_t* deviceId) { +OVMS_Status* OVMS_InferenceResponseGetOutput(OVMS_InferenceResponse* res, uint32_t id, const char** name, OVMS_DataType* datatype, const int64_t** shape, size_t* dimCount, const void** data, size_t* bytesize, OVMS_BufferType* bufferType, uint32_t* deviceId) { if (res == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_RESPONSE)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "inference response")); } if (name == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STRING)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "output name")); } if (datatype == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_NUMBER)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "data type")); } if (shape == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_TABLE)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "shape")); } if (dimCount == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_NUMBER)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "dimension count")); } if (data == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_DATA)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "data")); } if (bytesize == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_NUMBER)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "byte size")); } if (bufferType == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_NUMBER)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "buffer type")); } if (deviceId == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_NUMBER)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "device id")); } InferenceResponse* response = reinterpret_cast(res); const InferenceTensor* tensor = nullptr; @@ -509,15 +555,32 @@ OVMS_Status* OVMS_InferenceResponseGetOutput(OVMS_InferenceResponse* res, uint32 // possibly it is not neccessary to discriminate *data = buffer->data(); *bytesize = buffer->getByteSize(); + if (spdlog::default_logger_raw()->level() == spdlog::level::trace) { + std::stringstream ss; + ss << "C-API getting response output of servable: " << response->getServableName() + << " version: " << response->getServableVersion() + << " output id: " << id + << " name: " << *cppName + << " datatype: " << toString(ovms::getOVMSDataTypeAsPrecision(*datatype)) + << " shape: ["; + size_t i = 0; + for (i = 0; i < *dimCount - 1; ++i) { + ss << (*shape)[i] << ", "; + } + ss << (*shape)[i] << "]" + << " bufferType: " << *bufferType + << " deviceId: " << *deviceId; + SPDLOG_TRACE(ss.str()); + } return nullptr; } OVMS_Status* OVMS_InferenceResponseGetOutputCount(OVMS_InferenceResponse* res, uint32_t* count) { if (res == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_RESPONSE)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "inference response")); } if (count == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_NUMBER)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "output count")); } InferenceResponse* response = reinterpret_cast(res); *count = response->getOutputCount(); @@ -526,10 +589,10 @@ OVMS_Status* OVMS_InferenceResponseGetOutputCount(OVMS_InferenceResponse* res, u OVMS_Status* OVMS_InferenceResponseGetParameterCount(OVMS_InferenceResponse* res, uint32_t* count) { if (res == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_RESPONSE)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "inference response")); } if (count == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_NUMBER)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "parameter count")); } InferenceResponse* response = reinterpret_cast(res); *count = response->getParameterCount(); @@ -538,18 +601,18 @@ OVMS_Status* OVMS_InferenceResponseGetParameterCount(OVMS_InferenceResponse* res OVMS_Status* OVMS_InferenceResponseGetParameter(OVMS_InferenceResponse* res, uint32_t id, OVMS_DataType* datatype, const void** data) { if (res == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_RESPONSE)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "inference response")); } if (datatype == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_NUMBER)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "data type")); } if (data == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_DATA)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "data")); } InferenceResponse* response = reinterpret_cast(res); const InferenceParameter* parameter = response->getParameter(id); if (nullptr == parameter) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PARAMETER_FOR_REMOVAL)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PARAMETER)); } *datatype = parameter->getDataType(); *data = parameter->getData(); @@ -569,58 +632,75 @@ enum : unsigned int { TIMER_END }; -static Status getModelInstance(ovms::Server& server, const InferenceRequest* request, std::shared_ptr& modelInstance, - std::unique_ptr& modelInstanceUnloadGuardPtr) { - OVMS_PROFILE_FUNCTION(); +static Status getModelManager(Server& server, ModelManager** modelManager) { if (!server.isLive()) { - return ovms::Status(ovms::StatusCode::SERVER_NOT_READY_FOR_INFERENCE, "not live"); + return ovms::Status(ovms::StatusCode::SERVER_NOT_READY, "not live"); } if (!server.isReady()) { - return ovms::Status(ovms::StatusCode::SERVER_NOT_READY_FOR_INFERENCE, "not ready"); + return ovms::Status(ovms::StatusCode::SERVER_NOT_READY, "not ready"); } const ovms::Module* servableModule = server.getModule(ovms::SERVABLE_MANAGER_MODULE_NAME); if (!servableModule) { return ovms::Status(ovms::StatusCode::INTERNAL_ERROR, "missing servable manager"); } - auto& modelManager = dynamic_cast(servableModule)->getServableManager(); - return modelManager.getModelInstance(request->getServableName(), request->getServableVersion(), modelInstance, modelInstanceUnloadGuardPtr); + *modelManager = &dynamic_cast(servableModule)->getServableManager(); + return StatusCode::OK; } -Status getPipeline(ovms::Server& server, const InferenceRequest* request, +static Status getModelInstance(ovms::Server& server, const std::string& modelName, int64_t modelVersion, std::shared_ptr& modelInstance, + std::unique_ptr& modelInstanceUnloadGuardPtr) { + OVMS_PROFILE_FUNCTION(); + ModelManager* modelManager{nullptr}; + auto status = getModelManager(server, &modelManager); + if (!status.ok()) { + return status; + } + return modelManager->getModelInstance(modelName, modelVersion, modelInstance, modelInstanceUnloadGuardPtr); +} + +static Status getPipeline(ovms::Server& server, const InferenceRequest* request, InferenceResponse* response, std::unique_ptr& pipelinePtr) { OVMS_PROFILE_FUNCTION(); - if (!server.isLive()) { - return ovms::Status(ovms::StatusCode::SERVER_NOT_READY_FOR_INFERENCE, "not live"); + ModelManager* modelManager{nullptr}; + auto status = getModelManager(server, &modelManager); + if (!status.ok()) { + return status; } - if (!server.isReady()) { - return ovms::Status(ovms::StatusCode::SERVER_NOT_READY_FOR_INFERENCE, "not ready"); + return modelManager->createPipeline(pipelinePtr, request->getServableName(), request, response); +} + +static Status getPipelineDefinition(Server& server, const std::string& servableName, PipelineDefinition** pipelineDefinition, std::unique_ptr& unloadGuard) { + ModelManager* modelManager{nullptr}; + Status status = getModelManager(server, &modelManager); + if (!status.ok()) { + return status; } - const ovms::Module* servableModule = server.getModule(ovms::SERVABLE_MANAGER_MODULE_NAME); - if (!servableModule) { - return ovms::Status(ovms::StatusCode::INTERNAL_ERROR, "missing servable manager"); + *pipelineDefinition = modelManager->getPipelineFactory().findDefinitionByName(servableName); + if (!*pipelineDefinition) { + return Status(StatusCode::PIPELINE_DEFINITION_NAME_MISSING); } - auto& modelManager = dynamic_cast(servableModule)->getServableManager(); - return modelManager.createPipeline(pipelinePtr, request->getServableName(), request, response); + return (*pipelineDefinition)->waitForLoaded(unloadGuard, 0); } + } // namespace + OVMS_Status* OVMS_Inference(OVMS_Server* serverPtr, OVMS_InferenceRequest* request, OVMS_InferenceResponse** response) { OVMS_PROFILE_FUNCTION(); using std::chrono::microseconds; Timer timer; timer.start(TOTAL); if (serverPtr == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SERVER)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server")); } if (request == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_REQUEST)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "inference request")); } if (response == nullptr) { - return reinterpret_cast(new Status(StatusCode::NONEXISTENT_RESPONSE)); + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "inference response")); } auto req = reinterpret_cast(request); ovms::Server& server = *reinterpret_cast(serverPtr); - std::unique_ptr res(new ovms::InferenceResponse(req->getServableName(), req->getServableVersion())); SPDLOG_DEBUG("Processing C-API request for model: {}; version: {}", req->getServableName(), @@ -630,8 +710,9 @@ OVMS_Status* OVMS_Inference(OVMS_Server* serverPtr, OVMS_InferenceRequest* reque std::unique_ptr pipelinePtr; std::unique_ptr modelInstanceUnloadGuard; - auto status = getModelInstance(server, req, modelInstance, modelInstanceUnloadGuard); + auto status = getModelInstance(server, req->getServableName(), req->getServableVersion(), modelInstance, modelInstanceUnloadGuard); + std::unique_ptr res(new ovms::InferenceResponse(req->getServableName(), req->getServableVersion())); if (status == StatusCode::MODEL_NAME_MISSING) { SPDLOG_DEBUG("Requested model: {} does not exist. Searching for pipeline with that name...", req->getServableName()); status = getPipeline(server, req, res.get(), pipelinePtr); @@ -640,7 +721,7 @@ OVMS_Status* OVMS_Inference(OVMS_Server* serverPtr, OVMS_InferenceRequest* reque if (modelInstance) { // INCREMENT_IF_ENABLED(modelInstance->getMetricReporter().reqFailGrpcPredict); } - SPDLOG_INFO("Getting modelInstance or pipeline failed. {}", status.string()); + SPDLOG_DEBUG("Getting modelInstance or pipeline failed. {}", status.string()); return reinterpret_cast(new Status(status)); } // fix execution context and metrics @@ -672,6 +753,151 @@ OVMS_Status* OVMS_Inference(OVMS_Server* serverPtr, OVMS_InferenceRequest* reque return nullptr; } +OVMS_Status* OVMS_GetServableMetadata(OVMS_Server* serverPtr, const char* servableName, int64_t servableVersion, OVMS_ServableMetadata** servableMetadata) { + if (serverPtr == nullptr) { + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server")); + } + if (servableName == nullptr) { + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "servable name")); + } + if (servableMetadata == nullptr) { + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "servable metadata")); + } + // TODO check inputs + // TODO metrics + std::unique_ptr modelInstanceUnloadGuard; + std::shared_ptr modelInstance; + std::unique_ptr pipelinePtr; + ovms::Server& server = *reinterpret_cast(serverPtr); + auto status = getModelInstance(server, servableName, servableVersion, modelInstance, modelInstanceUnloadGuard); + + if (status == StatusCode::MODEL_NAME_MISSING) { + SPDLOG_DEBUG("Requested model: {} does not exist. Searching for pipeline with that name...", servableName); + PipelineDefinition* pipelineDefinition = nullptr; + std::unique_ptr unloadGuard; + status = getPipelineDefinition(server, servableName, &pipelineDefinition, unloadGuard); + if (!status.ok() || !pipelineDefinition) { + return reinterpret_cast(new Status(StatusCode::MODEL_NAME_MISSING)); + } + *servableMetadata = reinterpret_cast(new ovms::ServableMetadata(servableName, servableVersion, pipelineDefinition->getInputsInfo(), pipelineDefinition->getOutputsInfo())); + return nullptr; + } + if (!status.ok()) { + if (modelInstance) { + // INCREMENT_IF_ENABLED(modelInstance->getMetricReporter().reqFailGrpcPredict); + } + SPDLOG_INFO("Getting modelInstance or pipeline failed. {}", status.string()); + return reinterpret_cast(new Status(status)); + } + *servableMetadata = reinterpret_cast(new ovms::ServableMetadata(servableName, servableVersion, modelInstance->getInputsInfo(), modelInstance->getOutputsInfo(), modelInstance->getRTInfo())); + return nullptr; +} + +OVMS_Status* OVMS_ServableMetadataGetInputCount(OVMS_ServableMetadata* servableMetadata, uint32_t* count) { + if (servableMetadata == nullptr) { + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "servable metadata")); + } + if (count == nullptr) { + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "input count")); + } + ovms::ServableMetadata* metadata = reinterpret_cast(servableMetadata); + *count = metadata->getInputsInfo().size(); + return nullptr; +} + +OVMS_Status* OVMS_ServableMetadataGetOutputCount(OVMS_ServableMetadata* servableMetadata, uint32_t* count) { + if (servableMetadata == nullptr) { + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "servable metadata")); + } + if (count == nullptr) { + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "output count")); + } + ovms::ServableMetadata* metadata = reinterpret_cast(servableMetadata); + *count = metadata->getOutputsInfo().size(); + return nullptr; +} +OVMS_Status* OVMS_ServableMetadataGetInput(OVMS_ServableMetadata* servableMetadata, uint32_t id, const char** name, OVMS_DataType* datatype, size_t* dimCount, int64_t** shapeMin, int64_t** shapeMax) { + if (servableMetadata == nullptr) { + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "servable metadata")); + } + if (name == nullptr) { + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "input name")); + } + if (datatype == nullptr) { + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "data type")); + } + if (dimCount == nullptr) { + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "dimension count")); + } + if (shapeMin == nullptr) { + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "shape min array")); + } + if (shapeMax == nullptr) { + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "shape max array")); + } + ovms::ServableMetadata* metadata = reinterpret_cast(servableMetadata); + if (id >= metadata->getInputsInfo().size()) { + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_TENSOR)); + } + auto it = std::next(metadata->getInputsInfo().begin(), id); + *name = it->second->getName().c_str(); + *datatype = getPrecisionAsOVMSDataType(it->second->getPrecision()); + *dimCount = metadata->getInputDimsMin().at(*name).size(); + *shapeMin = const_cast(metadata->getInputDimsMin().at(*name).data()); + *shapeMax = const_cast(metadata->getInputDimsMax().at(*name).data()); + return nullptr; +} + +OVMS_Status* OVMS_ServableMetadataGetOutput(OVMS_ServableMetadata* servableMetadata, uint32_t id, const char** name, OVMS_DataType* datatype, size_t* dimCount, int64_t** shapeMin, int64_t** shapeMax) { + if (servableMetadata == nullptr) { + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "servable metadata")); + } + if (name == nullptr) { + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "output name")); + } + if (datatype == nullptr) { + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "data type")); + } + if (dimCount == nullptr) { + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "dimension count")); + } + if (shapeMin == nullptr) { + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "shape min array")); + } + if (shapeMax == nullptr) { + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "shape max array")); + } + ovms::ServableMetadata* metadata = reinterpret_cast(servableMetadata); + if (id >= metadata->getOutputsInfo().size()) { + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_TENSOR)); + } + auto it = std::next(metadata->getOutputsInfo().begin(), id); + *name = it->second->getName().c_str(); + *datatype = getPrecisionAsOVMSDataType(it->second->getPrecision()); + *dimCount = metadata->getOutputDimsMin().at(*name).size(); + *shapeMin = const_cast(metadata->getOutputDimsMin().at(*name).data()); + *shapeMax = const_cast(metadata->getOutputDimsMax().at(*name).data()); + return nullptr; +} + +OVMS_Status* OVMS_ServableMetadataGetInfo(OVMS_ServableMetadata* servableMetadata, const void** info) { + if (servableMetadata == nullptr) { + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "servable metadata")); + } + if (info == nullptr) { + return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "info")); + } + ovms::ServableMetadata* metadata = reinterpret_cast(servableMetadata); + *info = const_cast(reinterpret_cast(&(metadata->getInfo()))); + return nullptr; +} + +void OVMS_ServableMetadataDelete(OVMS_ServableMetadata* metadata) { + if (metadata == nullptr) + return; + delete reinterpret_cast(metadata); +} + #ifdef __cplusplus } #endif diff --git a/src/capi_frontend/capi_utils.cpp b/src/capi_frontend/capi_utils.cpp index 90d7e34497..b0873a8426 100644 --- a/src/capi_frontend/capi_utils.cpp +++ b/src/capi_frontend/capi_utils.cpp @@ -25,12 +25,11 @@ #include "../logging.hpp" #include "../shape.hpp" #include "../status.hpp" -#include "../tensorinfo.hpp" namespace ovms { -std::string tensorShapeToString(const shape_t& shape) { - return TensorInfo::shapeToString(shape); +std::string tensorShapeToString(const signed_shape_t& shape) { + return shapeToString(shape); } OVMS_DataType getPrecisionAsOVMSDataType(Precision precision) { @@ -145,7 +144,7 @@ Status prepareConsolidatedTensorImpl(InferenceResponse* response, const std::str Status status = response->addOutput( name, getPrecisionAsOVMSDataType(ovElementTypeToOvmsPrecision(precision)), - shape.data(), + reinterpret_cast(shape.data()), shape.size()); if (!status.ok()) { SPDLOG_LOGGER_ERROR(dag_executor_logger, "Failed to prepare consolidated tensor, servable: {}; tensor with name: {}", response->getServableName(), name); @@ -170,4 +169,7 @@ Status prepareConsolidatedTensorImpl(InferenceResponse* response, const std::str name, response->getServableName(), response->getServableVersion()); return StatusCode::INTERNAL_ERROR; } +bool requiresPreProcessing(const InferenceTensor& tensor) { + return false; +} } // namespace ovms diff --git a/src/capi_frontend/capi_utils.hpp b/src/capi_frontend/capi_utils.hpp index e061e1f61b..67eb087165 100644 --- a/src/capi_frontend/capi_utils.hpp +++ b/src/capi_frontend/capi_utils.hpp @@ -15,6 +15,7 @@ //***************************************************************************** #pragma once #include +#include #include "../ovms.h" // NOLINT #include "../precision.hpp" @@ -23,12 +24,15 @@ namespace ovms { class InferenceRequest; class InferenceResponse; +class InferenceTensor; class Status; -std::string tensorShapeToString(const shape_t& tensorShape); +std::string tensorShapeToString(const signed_shape_t& tensorShape); OVMS_DataType getPrecisionAsOVMSDataType(Precision precision); Precision getOVMSDataTypeAsPrecision(OVMS_DataType datatype); Status isNativeFileFormatUsed(const InferenceRequest& request, const std::string& name, bool& nativeFileFormatUsed); const std::string& getRequestServableName(const ovms::InferenceRequest& request); Status prepareConsolidatedTensorImpl(InferenceResponse* response, const std::string& name, ov::element::Type_t precision, const ov::Shape& shape, char*& bufferOut, size_t size); +bool requiresPreProcessing(const InferenceTensor& tensor); +std::string& createOrGetString(InferenceTensor& proto, int index); } // namespace ovms diff --git a/src/cli_parser.cpp b/src/cli_parser.cpp index faa873fbb0..74e369e04b 100644 --- a/src/cli_parser.cpp +++ b/src/cli_parser.cpp @@ -142,7 +142,7 @@ void CLIParser::parse(int argc, char** argv) { cxxopts::value()->default_value("CPU"), "TARGET_DEVICE") ("plugin_config", - "A dictionary of plugin configuration keys and their values, eg \"{\\\"NUM_STREAMS\\\": \\\"1\\\"}\". Default throughput streams for CPU and GPU are calculated by OpenVINO", + "A dictionary of plugin configuration keys and their values, eg \"{\\\"NUM_STREAMS\\\": \\\"1\\\"}\". Default number of streams is optimized to optimal latency with low concurrency.", cxxopts::value(), "PLUGIN_CONFIG") ("stateful", diff --git a/src/custom_nodes/Dockerfile.redhat b/src/custom_nodes/Dockerfile.redhat index cd1d212ae1..58ded3ea85 100644 --- a/src/custom_nodes/Dockerfile.redhat +++ b/src/custom_nodes/Dockerfile.redhat @@ -23,7 +23,7 @@ COPY opencv_cmake_flags.txt /opt COPY install_opencv.sh /opt RUN ./install_opencv.sh -ARG OPS="-fpic -O2 -U_FORTIFY_SOURCE -fstack-protector -fno-omit-frame-pointer -D_FORTIFY_SOURCE=1 -fno-strict-overflow -Wall -Wno-unknown-pragmas -Werror -Wno-error=sign-compare -fno-delete-null-pointer-checks -fwrapv" +ARG OPS="-fpic -O2 -U_FORTIFY_SOURCE -fstack-protector -fno-omit-frame-pointer -D_FORTIFY_SOURCE=1 -fno-strict-overflow -Wall -Wno-unknown-pragmas -Werror -Wno-error=sign-compare -fno-delete-null-pointer-checks -fwrapv -fstack-clash-protection -Wformat -Wformat-security -Werror=format-security" ARG NODE_NAME=model_zoo_intel_object_detection ARG NODE_TYPE=cpp diff --git a/src/custom_nodes/Dockerfile.ubuntu b/src/custom_nodes/Dockerfile.ubuntu index 4f97e38478..171c71b95c 100644 --- a/src/custom_nodes/Dockerfile.ubuntu +++ b/src/custom_nodes/Dockerfile.ubuntu @@ -14,7 +14,8 @@ # limitations under the License. # -FROM ubuntu:20.04 +ARG BASE_IMAGE +FROM $BASE_IMAGE as base_build RUN apt update && DEBIAN_FRONTEND=noninteractive apt install -y build-essential curl @@ -24,7 +25,7 @@ COPY opencv_cmake_flags.txt /opt COPY install_opencv.sh /opt RUN ./install_opencv.sh -ARG OPS="-fpic -O2 -U_FORTIFY_SOURCE -fstack-protector -fno-omit-frame-pointer -D_FORTIFY_SOURCE=1 -fno-strict-overflow -Wall -Wno-unknown-pragmas -Werror -Wno-error=sign-compare -fno-delete-null-pointer-checks -fwrapv" +ARG OPS="-fpic -O2 -U_FORTIFY_SOURCE -fstack-protector -fno-omit-frame-pointer -D_FORTIFY_SOURCE=1 -fno-strict-overflow -Wall -Wno-unknown-pragmas -Werror -Wno-error=sign-compare -fno-delete-null-pointer-checks -fwrapv -fstack-clash-protection -Wformat -Wformat-security -Werror=format-security" ARG NODE_NAME=model_zoo_intel_object_detection ARG NODE_TYPE=cpp diff --git a/src/custom_nodes/Makefile b/src/custom_nodes/Makefile index 9e5ca87d06..1ec06d6d7f 100644 --- a/src/custom_nodes/Makefile +++ b/src/custom_nodes/Makefile @@ -19,6 +19,7 @@ OPENCV_BUILD_FLAGS ?= ../../third_party/opencv/opencv_cmake_flags.txt OPENCV_INSTALL_SCRIPT ?= ../../third_party/opencv/install_opencv.sh BASE_OS ?= ubuntu +BASE_IMAGE ?= ubuntu:20.04 NODES ?= add_one east_ocr face_blur horizontal_ocr image_transformation model_zoo_intel_object_detection NODE_TYPE ?= cpp @@ -28,13 +29,24 @@ NODE_TYPE ?= cpp default: all all: +ifeq ($(NO_DOCKER_CACHE),true) + $(eval NO_CACHE_OPTION:=--no-cache) + @echo "Docker image will be rebuilt from scratch" +endif @cp $(HEADER_FILE_PATH) . @cp $(OPENCV_BUILD_FLAGS) . @cp $(OPENCV_INSTALL_SCRIPT) . @cp -r ../queue.hpp ./queue.hpp - for NODE_NAME in $(NODES); do \ - echo "Building $$NODE_NAME" ; \ - docker build -f Dockerfile.$(BASE_OS) -t custom_node_build_image:latest --build-arg http_proxy=${http_proxy} --build-arg https_proxy=${https_proxy} --build-arg no_proxy=${no_proxy} --build-arg NODE_NAME=$$NODE_NAME --build-arg NODE_TYPE=$(NODE_TYPE) . || exit 1 ; \ +# Pass down --no-cache option to docker build for the first node, but not for the rest, the rest will re-build last layer anyway + first_iteration=true ; for NODE_NAME in $(NODES); do \ + if [ "$$first_iteration" = true ]; then \ + first_iteration=false ; \ + docker_cache_param=$(NO_CACHE_OPTION) ; \ + else \ + docker_cache_param= ; \ + fi ; \ + echo $$docker_cache_param ; \ + docker build $$docker_cache_param -f Dockerfile.$(BASE_OS) -t custom_node_build_image:latest --build-arg http_proxy=${http_proxy} --build-arg https_proxy=${https_proxy} --build-arg no_proxy=${no_proxy} --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg NODE_NAME=$$NODE_NAME --build-arg NODE_TYPE=$(NODE_TYPE) . || exit 1 ; \ mkdir -p ./lib/$(BASE_OS) ; \ docker cp $$(docker create --rm custom_node_build_image:latest):/custom_nodes/lib/libcustom_node_$$NODE_NAME.so ./lib/$(BASE_OS)/ || exit 1 ; \ echo "Built $$NODE_NAME" ; \ diff --git a/src/custom_nodes/add_one/add_one.cpp b/src/custom_nodes/add_one/add_one.cpp index 1799e10615..f1c1cde0cc 100644 --- a/src/custom_nodes/add_one/add_one.cpp +++ b/src/custom_nodes/add_one/add_one.cpp @@ -34,7 +34,7 @@ static constexpr const char* OUTPUT_TENSOR_DIMS_NAME = "output_dims"; static constexpr const char* OUTPUT_INFO_NAME = "output_info"; static constexpr const char* OUTPUT_INFO_DIMS_NAME = "output_info_dims"; -int initializeInternalManager(void** customNodeLibraryInternalManager, const struct CustomNodeParam* params, int paramsCount) { +static int initializeInternalManager(void** customNodeLibraryInternalManager, const struct CustomNodeParam* params, int paramsCount) { std::unique_ptr internalManager = std::make_unique(); NODE_ASSERT(internalManager != nullptr, "internalManager allocation failed"); @@ -63,7 +63,7 @@ int initializeInternalManager(void** customNodeLibraryInternalManager, const str return 0; } -int reinitializeInternalManagerIfNeccessary(void** customNodeLibraryInternalManager, const struct CustomNodeParam* params, int paramsCount) { +static int reinitializeInternalManagerIfNeccessary(void** customNodeLibraryInternalManager, const struct CustomNodeParam* params, int paramsCount) { InternalManager* internalManager = static_cast(*customNodeLibraryInternalManager); NODE_ASSERT(internalManager != nullptr, "internalManager is not initialized"); std::unique_lock lock(internalManager->getInternalManagerLock()); diff --git a/src/custom_nodes/add_one/add_one_internal_manager.hpp b/src/custom_nodes/add_one/add_one_internal_manager.hpp index 9fafef054c..6fb3407ee6 100644 --- a/src/custom_nodes/add_one/add_one_internal_manager.hpp +++ b/src/custom_nodes/add_one/add_one_internal_manager.hpp @@ -34,6 +34,7 @@ class AddOneInternalManager : public CustomNodeLibraryInternalManager { currentOutputQueueSize(currentOutputQueueSize), currentInfoQueueSize(currentInfoQueueSize) { } + ~AddOneInternalManager() {} int getOutputSize() { return outputSize; } diff --git a/src/custom_nodes/common/buffersqueue.cpp b/src/custom_nodes/common/buffersqueue.cpp index e1b08ca9a6..d338eac258 100644 --- a/src/custom_nodes/common/buffersqueue.cpp +++ b/src/custom_nodes/common/buffersqueue.cpp @@ -30,6 +30,8 @@ BuffersQueue::BuffersQueue(size_t singleBufferSize, int streamsLength) : } } +BuffersQueue::~BuffersQueue() {} + void* BuffersQueue::getBuffer() { // can be easily switched to async version if need arise auto idleId = tryToGetIdleStream(); diff --git a/src/custom_nodes/common/buffersqueue.hpp b/src/custom_nodes/common/buffersqueue.hpp index 4e69cb1aa8..8e31b316a5 100644 --- a/src/custom_nodes/common/buffersqueue.hpp +++ b/src/custom_nodes/common/buffersqueue.hpp @@ -36,6 +36,7 @@ class BuffersQueue : protected Queue { public: BuffersQueue(size_t singleBufferSize, int streamsLength); + ~BuffersQueue(); void* getBuffer(); bool returnBuffer(void* buffer); const size_t getSize(); diff --git a/src/custom_nodes/common/custom_node_library_internal_manager.cpp b/src/custom_nodes/common/custom_node_library_internal_manager.cpp index 13f11b2463..58a905810d 100644 --- a/src/custom_nodes/common/custom_node_library_internal_manager.cpp +++ b/src/custom_nodes/common/custom_node_library_internal_manager.cpp @@ -23,6 +23,12 @@ namespace ovms { namespace custom_nodes_common { +CustomNodeLibraryInternalManager::CustomNodeLibraryInternalManager() { +} + +CustomNodeLibraryInternalManager::~CustomNodeLibraryInternalManager() { +} + bool CustomNodeLibraryInternalManager::createBuffersQueue(const std::string& name, size_t singleBufferSize, int streamsLength) { auto it = outputBuffers.find(name); if (it != outputBuffers.end()) { diff --git a/src/custom_nodes/common/custom_node_library_internal_manager.hpp b/src/custom_nodes/common/custom_node_library_internal_manager.hpp index 846d4da050..dc1b89b0d2 100644 --- a/src/custom_nodes/common/custom_node_library_internal_manager.hpp +++ b/src/custom_nodes/common/custom_node_library_internal_manager.hpp @@ -31,7 +31,8 @@ class CustomNodeLibraryInternalManager { std::shared_timed_mutex internalManagerLock; public: - CustomNodeLibraryInternalManager() = default; + CustomNodeLibraryInternalManager(); + ~CustomNodeLibraryInternalManager(); bool createBuffersQueue(const std::string& name, size_t singleBufferSize, int streamsLength); bool recreateBuffersQueue(const std::string& name, size_t singleBufferSize, int streamsLength); BuffersQueue* getBuffersQueue(const std::string& name); diff --git a/src/custom_nodes/east_ocr/east_ocr.cpp b/src/custom_nodes/east_ocr/east_ocr.cpp index 82b5ebc09e..c15deec2d6 100644 --- a/src/custom_nodes/east_ocr/east_ocr.cpp +++ b/src/custom_nodes/east_ocr/east_ocr.cpp @@ -36,7 +36,7 @@ struct BoxMetadata { float originalHeight; }; -bool copy_images_into_output(struct CustomNodeTensor* output, const std::vector& boxes, const std::vector& metadata, const cv::Mat& originalImage, int targetImageHeight, int targetImageWidth, const std::string& targetImageLayout, bool convertToGrayScale, int rotationAngleThreshold) { +static bool copy_images_into_output(struct CustomNodeTensor* output, const std::vector& boxes, const std::vector& metadata, const cv::Mat& originalImage, int targetImageHeight, int targetImageWidth, const std::string& targetImageLayout, bool convertToGrayScale, int rotationAngleThreshold) { const uint64_t outputBatch = boxes.size(); int channels = convertToGrayScale ? 1 : 3; @@ -86,7 +86,7 @@ bool copy_images_into_output(struct CustomNodeTensor* output, const std::vector< return true; } -bool copy_boxes_into_output(struct CustomNodeTensor* output, const std::vector& boxes) { +static bool copy_boxes_into_output(struct CustomNodeTensor* output, const std::vector& boxes) { const uint64_t outputBatch = boxes.size(); uint64_t byteSize = sizeof(int32_t) * 4 * outputBatch; @@ -113,7 +113,7 @@ bool copy_boxes_into_output(struct CustomNodeTensor* output, const std::vector& confidences) { +static bool copy_confidences_into_output(struct CustomNodeTensor* output, const std::vector& confidences) { const uint64_t outputBatch = confidences.size(); uint64_t byteSize = sizeof(float) * outputBatch; diff --git a/src/custom_nodes/horizontal_ocr/README.md b/src/custom_nodes/horizontal_ocr/README.md index b128f7eba5..b2107cbc0b 100644 --- a/src/custom_nodes/horizontal_ocr/README.md +++ b/src/custom_nodes/horizontal_ocr/README.md @@ -29,7 +29,7 @@ make BASE_OS=redhat NODES=horizontal_ocr | Input name | Description | Shape | Precision | | ------------- |:-------------:| -----:| ------:| | image | Input image in an array format. Only batch size 1 is supported and images must have 3 channels. Resolution is configurable via parameters original_image_width and original_image_height. | `1,3,H,W` | FP32 | -| boxes | horizontal-text-detection model output `boxes` | `100,5` | FP32 | +| boxes | horizontal-text-detection model output `boxes`, where `X` is number of boxes and second dimension contains box info and confidence level | `X,5` | FP32 | # Custom node outputs diff --git a/src/custom_nodes/horizontal_ocr/horizontal_ocr.cpp b/src/custom_nodes/horizontal_ocr/horizontal_ocr.cpp index f87bcf2415..92e2ded9c4 100644 --- a/src/custom_nodes/horizontal_ocr/horizontal_ocr.cpp +++ b/src/custom_nodes/horizontal_ocr/horizontal_ocr.cpp @@ -29,7 +29,7 @@ static constexpr const char* TEXT_IMAGES_TENSOR_NAME = "text_images"; static constexpr const char* COORDINATES_TENSOR_NAME = "text_coordinates"; static constexpr const char* CONFIDENCE_TENSOR_NAME = "confidence_levels"; -bool copy_images_into_output(struct CustomNodeTensor* output, const std::vector& boxes, const cv::Mat& originalImage, int targetImageHeight, int targetImageWidth, const std::string& targetImageLayout, bool convertToGrayScale) { +static bool copy_images_into_output(struct CustomNodeTensor* output, const std::vector& boxes, const cv::Mat& originalImage, int targetImageHeight, int targetImageWidth, const std::string& targetImageLayout, bool convertToGrayScale) { const uint64_t outputBatch = boxes.size(); int channels = convertToGrayScale ? 1 : 3; @@ -75,7 +75,7 @@ bool copy_images_into_output(struct CustomNodeTensor* output, const std::vector< return true; } -bool copy_coordinates_into_output(struct CustomNodeTensor* output, const std::vector& boxes) { +static bool copy_coordinates_into_output(struct CustomNodeTensor* output, const std::vector& boxes) { const uint64_t outputBatch = boxes.size(); uint64_t byteSize = sizeof(int32_t) * 4 * outputBatch; @@ -102,7 +102,7 @@ bool copy_coordinates_into_output(struct CustomNodeTensor* output, const std::ve return true; } -bool copy_scores_into_output(struct CustomNodeTensor* output, const std::vector& scores) { +static bool copy_scores_into_output(struct CustomNodeTensor* output, const std::vector& scores) { const uint64_t outputBatch = scores.size(); uint64_t byteSize = sizeof(float) * outputBatch; @@ -198,7 +198,7 @@ int execute(const struct CustomNodeTensor* inputs, int inputsCount, struct Custo uint64_t _numDetections = boxesTensor->dims[0]; uint64_t _numItems = boxesTensor->dims[1]; - NODE_ASSERT(boxesTensor->dims[0] == 100, "boxes has dim 0 not equal to 100"); + NODE_ASSERT(boxesTensor->dimsCount == 2, "boxes shape needs to have 2 dimensions"); NODE_ASSERT(boxesTensor->dims[1] == 5, "boxes has dim 1 not equal to 5"); int numDetections = static_cast(_numDetections); int numItems = static_cast(_numItems); @@ -300,7 +300,7 @@ int getInputsInfo(struct CustomNodeTensorInfo** info, int* infoCount, const stru (*info)[1].dimsCount = 2; (*info)[1].dims = (uint64_t*)malloc((*info)[1].dimsCount * sizeof(uint64_t)); NODE_ASSERT(((*info)[1].dims) != nullptr, "malloc has failed"); - (*info)[1].dims[0] = 100; + (*info)[1].dims[0] = 0; (*info)[1].dims[1] = 5; (*info)[1].precision = FP32; return 0; diff --git a/src/custom_nodes/image_transformation/README.md b/src/custom_nodes/image_transformation/README.md index 75a209496e..fd02b74a40 100644 --- a/src/custom_nodes/image_transformation/README.md +++ b/src/custom_nodes/image_transformation/README.md @@ -48,9 +48,9 @@ make BASE_OS=redhat NODES=image_transformation | target_image_color_order | Output image color order. If specified and differs from original_image_color_order, color order conversion will be performed | `BGR` | | | original_image_layout | Input image layout. This is required to determine image shape from input shape | | ✓ | | target_image_layout | Output image layout. If specified and differs from original_image_layout, layout conversion will be performed | | | -| scale | All values will be divided by this value. When `scale_values` is specified, this value is ignored. [read more](https://docs.openvino.ai/2022.2/openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model_General.html) | | | -| scale_values | Scale values to be used for the input image per channel. Input data will be divided by those values. Values should be provided in the same order as output image color order. [read more](https://docs.openvino.ai/2022.2/openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model_General.html) | | | -| mean_values | Mean values to be used for the input image per channel. Values will be substracted from each input image data value. Values should be provided in the same order as output image color order. [read more](https://docs.openvino.ai/2022.2/openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model_General.html) | | | +| scale | All values will be divided by this value. When `scale_values` is specified, this value is ignored. [read more](https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_Additional_Optimization_Use_Cases.html#specifying-mean-and-scale-values) | | | +| scale_values | Scale values to be used for the input image per channel. Input data will be divided by those values. Values should be provided in the same order as output image color order. [read more](https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_Additional_Optimization_Use_Cases.html#specifying-mean-and-scale-values) | | | +| mean_values | Mean values to be used for the input image per channel. Values will be substracted from each input image data value. Values should be provided in the same order as output image color order. [read more](https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_Additional_Optimization_Use_Cases.html#specifying-mean-and-scale-values) | | | | debug | Defines if debug messages should be displayed | false | | > **_NOTE:_** Substracting mean values is performed before division by scale values. diff --git a/src/custom_nodes/model_zoo_intel_object_detection/model_zoo_intel_object_detection.cpp b/src/custom_nodes/model_zoo_intel_object_detection/model_zoo_intel_object_detection.cpp index b7b59211fa..fcf91d2060 100644 --- a/src/custom_nodes/model_zoo_intel_object_detection/model_zoo_intel_object_detection.cpp +++ b/src/custom_nodes/model_zoo_intel_object_detection/model_zoo_intel_object_detection.cpp @@ -47,7 +47,7 @@ static constexpr const char* OUTPUT_IMAGES_INFO_DIMS_NAME = "images_info_dims"; static constexpr const char* OUTPUT_CONFIDENCES_INFO_DIMS_NAME = "confidences_info_dims"; static constexpr const char* OUTPUT_LABEL_IDS_INFO_DIMS_NAME = "label_ids_info_dims"; -bool copy_images_into_output(struct CustomNodeTensor* output, const std::vector& boxes, const cv::Mat& originalImage, int targetImageHeight, int targetImageWidth, const std::string& targetImageLayout, bool convertToGrayScale, CustomNodeLibraryInternalManager* internalManager) { +static bool copy_images_into_output(struct CustomNodeTensor* output, const std::vector& boxes, const cv::Mat& originalImage, int targetImageHeight, int targetImageWidth, const std::string& targetImageLayout, bool convertToGrayScale, CustomNodeLibraryInternalManager* internalManager) { const uint64_t outputBatch = boxes.size(); int channels = convertToGrayScale ? 1 : 3; uint64_t byteSize = sizeof(float) * targetImageHeight * targetImageWidth * channels * outputBatch; @@ -98,7 +98,7 @@ bool copy_images_into_output(struct CustomNodeTensor* output, const std::vector< return true; } -bool copy_coordinates_into_output(struct CustomNodeTensor* output, const std::vector& detections, CustomNodeLibraryInternalManager* internalManager) { +static bool copy_coordinates_into_output(struct CustomNodeTensor* output, const std::vector& detections, CustomNodeLibraryInternalManager* internalManager) { const uint64_t outputBatch = detections.size(); uint64_t byteSize = sizeof(int32_t) * 4 * outputBatch; @@ -125,7 +125,7 @@ bool copy_coordinates_into_output(struct CustomNodeTensor* output, const std::ve return true; } -bool copy_confidences_into_output(struct CustomNodeTensor* output, const std::vector& confidences, CustomNodeLibraryInternalManager* internalManager) { +static bool copy_confidences_into_output(struct CustomNodeTensor* output, const std::vector& confidences, CustomNodeLibraryInternalManager* internalManager) { const uint64_t outputBatch = confidences.size(); uint64_t byteSize = sizeof(float) * outputBatch; @@ -148,7 +148,7 @@ bool copy_confidences_into_output(struct CustomNodeTensor* output, const std::ve return true; } -bool copy_label_ids_into_output(struct CustomNodeTensor* output, const std::vector& labelIds, CustomNodeLibraryInternalManager* internalManager) { +static bool copy_label_ids_into_output(struct CustomNodeTensor* output, const std::vector& labelIds, CustomNodeLibraryInternalManager* internalManager) { const uint64_t outputBatch = labelIds.size(); uint64_t byteSize = sizeof(int32_t) * outputBatch; diff --git a/src/custom_nodes/tokenizer/.gitignore b/src/custom_nodes/tokenizer/.gitignore new file mode 100644 index 0000000000..9bc2095cbb --- /dev/null +++ b/src/custom_nodes/tokenizer/.gitignore @@ -0,0 +1,4 @@ +build +common +lib +custom_node_interface.h diff --git a/src/custom_nodes/tokenizer/CMakeLists.txt b/src/custom_nodes/tokenizer/CMakeLists.txt new file mode 100644 index 0000000000..3c2ba69be5 --- /dev/null +++ b/src/custom_nodes/tokenizer/CMakeLists.txt @@ -0,0 +1,29 @@ +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +cmake_minimum_required(VERSION 3.10) +project(node) + +include(ExternalProject) + +set(BLINGFIRE_SHA 5089d31914cbed7a24589e753bd6cd362a377fbb) + +set(CMAKE_POSITION_INDEPENDENT_CODE ON) + +add_subdirectory(src) + +if (WITH_TESTS) + add_subdirectory(test) +endif() diff --git a/src/custom_nodes/tokenizer/Dockerfile.redhat b/src/custom_nodes/tokenizer/Dockerfile.redhat new file mode 100644 index 0000000000..a6dffa2124 --- /dev/null +++ b/src/custom_nodes/tokenizer/Dockerfile.redhat @@ -0,0 +1,27 @@ +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +FROM registry.access.redhat.com/ubi8/ubi:8.7 +RUN dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm && yum update -d6 -y && yum install -d6 -y gcc-c++ curl xz cmake git + +WORKDIR / +COPY ./common /custom_nodes/common +COPY ./src /custom_nodes/tokenizer/src +COPY ./test /custom_nodes/tokenizer/test +COPY ./CMakeLists.txt /custom_nodes/tokenizer/CMakeLists.txt +COPY custom_node_interface.h / +WORKDIR /custom_nodes/tokenizer/build +RUN cmake -DWITH_TESTS=1 .. && make -j`nproc` && ./test/detokenization_test && ./test/tokenization_test diff --git a/src/custom_nodes/tokenizer/Dockerfile.ubuntu b/src/custom_nodes/tokenizer/Dockerfile.ubuntu new file mode 100644 index 0000000000..332a04bf1b --- /dev/null +++ b/src/custom_nodes/tokenizer/Dockerfile.ubuntu @@ -0,0 +1,28 @@ +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +FROM ubuntu:20.04 + +RUN apt update && DEBIAN_FRONTEND=noninteractive apt install -y build-essential curl cmake git + +WORKDIR / +COPY ./common /custom_nodes/common +COPY ./src /custom_nodes/tokenizer/src +COPY ./test /custom_nodes/tokenizer/test +COPY ./CMakeLists.txt /custom_nodes/tokenizer/CMakeLists.txt +COPY custom_node_interface.h / +WORKDIR /custom_nodes/tokenizer/build +RUN cmake -DWITH_TESTS=1 .. && make -j`nproc` && ./test/detokenization_test && ./test/tokenization_test diff --git a/src/custom_nodes/tokenizer/Makefile b/src/custom_nodes/tokenizer/Makefile new file mode 100644 index 0000000000..69cf8aaaca --- /dev/null +++ b/src/custom_nodes/tokenizer/Makefile @@ -0,0 +1,40 @@ +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +HEADER_FILE_PATH ?= ../../custom_node_interface.h +COMMON_DIR ?= ../common + +BASE_OS ?= ubuntu + +.PHONY: all + +default: all + +all: +ifeq ($(NO_DOCKER_CACHE),true) + $(eval NO_CACHE_OPTION:=--no-cache) + @echo "Docker image will be rebuilt from scratch" +endif + @cp $(HEADER_FILE_PATH) . + @cp -r $(COMMON_DIR) ./ + echo "Building tokenizer" + docker build $(NO_CACHE_OPTION) -f Dockerfile.$(BASE_OS) -t tokenizer_build_image:latest --build-arg http_proxy=${http_proxy} --build-arg https_proxy=${https_proxy} --build-arg no_proxy=${no_proxy} . + mkdir -p ./lib/$(BASE_OS) + docker cp $$(docker create --rm tokenizer_build_image:latest):/custom_nodes/tokenizer/build/src/libdetokenizer.so ./lib/$(BASE_OS)/ + docker cp $$(docker create --rm tokenizer_build_image:latest):/custom_nodes/tokenizer/build/src/libtokenizer.so ./lib/$(BASE_OS)/ + echo "Built tokenizer" + @rm -rf ./common + @rm custom_node_interface.h diff --git a/src/custom_nodes/tokenizer/README.md b/src/custom_nodes/tokenizer/README.md new file mode 100644 index 0000000000..5eba47b0ba --- /dev/null +++ b/src/custom_nodes/tokenizer/README.md @@ -0,0 +1,102 @@ +# Custom node for text tokenization and detokenization for Large Language Models + +This custom node project compiles into 2 libraries: +- libtokenizer.so +- libdetokenizer.so + +It can be used in OVMS DAG to tokenize string input into tokens accepted by NLP model. The other library can be used to create string from tokens generated by NLP model. + +![diagram](diagram.svg) + +The nodes statically link [BlingFire from Microsoft](https://github.com/microsoft/BlingFire) library and use it for tokenization. + +# Supported models +All models which accept the tokens via 2 inputs: `input_ids` of shape `[B, N]` (where `B` is batch and `N` is max length of tokens per batch) and `attention_mask` of the same shape `[B, N]` which declare the padding using `0` and `1` values. The inputs must have `I64` precision. + +The model must output logits with 3 dimensions: `[B, N, K]`, where `K` represents vocabulary size. The precision of the logits should be `FP32`. + +Known models with such format: +- [GPT-2](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public/gpt-2) +- [GPT-J](https://huggingface.co/EleutherAI/gpt-j-6B) + +**NOTE:** Both models work with `gpt2.bin`/`gpt2.i2w` pretrained tokenization models from BlingFire repo. Pretrained tokenization models [can be found here](https://github.com/microsoft/BlingFire/tree/5089d31914cbed7a24589e753bd6cd362a377fbb/ldbsrc/ldb). + +# Building custom node library + +You can build the shared library of the custom node simply by running the following commands: +```bash +git clone https://github.com/openvinotoolkit/model_server && cd model_server/src/custom_nodes/tokenizer +make +``` +It will compile the library inside a docker container and save the results in `lib//` folder. + +You can also select base OS between UBI 8.6 (redhat) and Ubuntu 20.04 (ubuntu) by setting `BASE_OS` environment variable. +```bash +make BASE_OS=redhat +``` + +It will compile the libraries: +```bash +ls lib/redhat -A1 + +libdetokenizer.so +libtokenizer.so +``` + + +# libtokenizer.so inputs + +| Input name | Description | Shape | Precision | +| ------------- |:-------------:| -----:| -----:| +| texts | 2D array of bytes, where each row represents single, null terminated sentence, padded with `\0` alignment to longest batch. | `-1,-1` | U8 | + +Example 2 strings - `abcd` and `ab`: +``` +[ + 'a', 'b', 'c', 'd', 0, + 'a', 'b', 0 , 0 , 0 +] +``` +OVMS supports automatic conversion from TensorflowServing/KServe proto to 2D U8 data matching the node input format. + +# libtokenizer.so outputs +The outputs of this node match inputs of GPT-like models. + +| Output name | Description | Shape | Precision | +| ------------- |:-------------:| -----:| -----:| +| input_ids | Returns 2D array of tokenized sentences padded to longest batch of tokens. Padding info required by GPT models are contained in `attention_mask` output | `-1,-1` | I64 | +| attention_mask | Returns padding information, 2D array of `1` and `0` where `0` indicates padding. The same is always identical to `input_ids` output. | `-1,-1` | I64 | + +# libdetokenizer.so inputs + +| Input name | Description | Shape | Precision | +| ------------- |:-------------:| -----:| -----:| +| logits | Output from GPT-like model, the next token prediction tensor | `-1,-1,-1` | FP32 | +| input_ids | Output from tokenizer library required for knowledge of previous tokens (context) | `-1,-1` | I64 | +| attention_mask | Output from tokenizer library required for knowledge of previous tokens (context) | `-1,-1` | I64 | + + +# libtokenizer.so outputs + +| Output name | Description | Shape | Precision | +| ------------- |:-------------:| -----:| -----:| +| texts | Returns 2D array of sentences padded to longest batch of tokens. The sentences contain original content plus the autocompletion provided by prediction of subsequent token | `-1,-1` | U8 | + +OVMS supports automatic conversion from `U8 2D` format to TensorflowServing/KServe proto. + +# Custom node parameters +Parameters can be defined in pipeline definition in OVMS configuration file. [Read more](https://github.com/openvinotoolkit/model_server/blob/releases/2022/1/docs/custom_node_development.md) about node parameters. + +## Parameters for libtokenizer.so +| Parameter | Description | Default | Required | +| ------------- | ------------- | ------------- | ------------ | +| model_path | Local path to [tokenization model](https://github.com/microsoft/BlingFire/tree/5089d31914cbed7a24589e753bd6cd362a377fbb/ldbsrc/ldb) in BlingFire format | | ✓ | +| max_ids_arr_length | Maximum number of tokens to be generated from input sentences. If input string exceeds this amount, the generated tokens are cut. | 1024 | | +| debug | Defines if debug messages should be displayed | false | | + +## Parameters for libdetokenizer.so +| Parameter | Description | Default | Required | +| ------------- | ------------- | ------------- | ------------ | +| model_path | Local path to [detokenization model](https://github.com/microsoft/BlingFire/tree/5089d31914cbed7a24589e753bd6cd362a377fbb/ldbsrc/ldb) in BlingFire format | | ✓ | +| max_buffer_length | Maximum size of text generated by detokenization. This includes context (sentence before autocompletion by GPT-model). If generated text is larger than buffer, it is shrinked. This value should generally be larger than `max_ids_arr_length` in tokenization node | 4096 | | +| debug | Defines if debug messages should be displayed | false | | diff --git a/src/custom_nodes/tokenizer/diagram.svg b/src/custom_nodes/tokenizer/diagram.svg new file mode 100644 index 0000000000..0ada1d0444 --- /dev/null +++ b/src/custom_nodes/tokenizer/diagram.svg @@ -0,0 +1,4 @@ + + + +
entry
entry
gRPC/REST
gRPC/REST
exit
exit
tokenizer
tokenizer
gpt-j-6b
gpt-j-6b
detokenizer
detokenizer
libtokenizer.so
libtokenizer.so
libdetokenizer.so
libdetokenizer.so
OpenVINO model
OpenVINO model
gRPC/REST
gRPC/REST
OpenVINO is
Neurons are fascin
OpenVINO is...
OpenVINO is awesome
Neurons are fascinating
OpenVINO is awesome...
Text is not SVG - cannot display
\ No newline at end of file diff --git a/src/custom_nodes/tokenizer/src/CMakeLists.txt b/src/custom_nodes/tokenizer/src/CMakeLists.txt new file mode 100644 index 0000000000..1d97e5679a --- /dev/null +++ b/src/custom_nodes/tokenizer/src/CMakeLists.txt @@ -0,0 +1,51 @@ +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +cmake_minimum_required(VERSION 3.10) +project(tokenizer) + +include(ExternalProject) + +set(CMAKE_CXX_STANDARD 17) + +set(BLINGFIRE_INSTALL_DIR ${CMAKE_BINARY_DIR}/external/blingfire) + +ExternalProject_Add(blingfire + PREFIX blingfire + GIT_REPOSITORY https://github.com/microsoft/BlingFire.git + GIT_TAG ${BLINGFIRE_SHA} + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${BLINGFIRE_INSTALL_DIR} +) + +set(BLINGFIRE_STATIC_LIB_DIR ${CMAKE_BINARY_DIR}/src/blingfire/src/blingfire-build) +set(BLINGFIRE_STATIC_LIBS + ${BLINGFIRE_STATIC_LIB_DIR}/libbingfirtinydll_static.a + ${BLINGFIRE_STATIC_LIB_DIR}/libfsaClient.a) + +set(UTIL_DIRS + ../../common/ # for common/utils.hpp + ../../../) # for custom_node_interface.h + +add_library(tokenizer SHARED model.cpp tokenizer.cpp) +target_include_directories(tokenizer PRIVATE ${BLINGFIRE_INSTALL_DIR}/include ${UTIL_DIRS}) +target_link_libraries(tokenizer ${BLINGFIRE_STATIC_LIBS} stdc++fs) +target_compile_options(tokenizer PRIVATE -fstack-protector -fno-omit-frame-pointer -fno-strict-overflow -Wall -Wno-unknown-pragmas -Werror -Wno-error=sign-compare -fno-delete-null-pointer-checks -fwrapv -fstack-clash-protection -Wformat -Wformat-security -Werror=format-security) +add_dependencies(tokenizer blingfire) + +add_library(detokenizer SHARED model.cpp detokenizer.cpp) +target_include_directories(detokenizer PRIVATE ${BLINGFIRE_INSTALL_DIR}/include ${UTIL_DIRS}) +target_link_libraries(detokenizer ${BLINGFIRE_STATIC_LIBS} stdc++fs) +target_compile_options(detokenizer PRIVATE -fstack-protector -fno-omit-frame-pointer -fno-strict-overflow -Wall -Wno-unknown-pragmas -Werror -Wno-error=sign-compare -fno-delete-null-pointer-checks -fwrapv -fstack-clash-protection -Wformat -Wformat-security -Werror=format-security) +add_dependencies(detokenizer blingfire) diff --git a/src/custom_nodes/tokenizer/src/detokenizer.cpp b/src/custom_nodes/tokenizer/src/detokenizer.cpp new file mode 100644 index 0000000000..5a018886ca --- /dev/null +++ b/src/custom_nodes/tokenizer/src/detokenizer.cpp @@ -0,0 +1,272 @@ +//***************************************************************************** +// Copyright 2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include +#include +#include +#include +#include +#include + +#include "custom_node_interface.h" // NOLINT +#include "model.hpp" +#include "utils.hpp" + +#define DEBUG_MSG(str) \ + if (debugMode) { \ + std::cout << "[detokenizer] " << str << std::endl; \ + } + +#define INPUT_NAME_LOGITS "logits" +#define INPUT_NAME_PREVIOUS_TOKENS "input_ids" +#define INPUT_NAME_PREVIOUS_ATTENTION "attention_mask" + +#define OUTPUT_NAME_TEXTS "texts" + +// Size of memory allocation on the heap for generated text. +// If the size of the output is larger than this value, the output is truncated. +// Consider using memory pool. +#define DEFAULT_MAX_BUF_LEN 4096 + +using namespace custom_nodes::tokenizer; + +int initialize(void** customNodeLibraryInternalManager, const struct CustomNodeParam* params, int paramsCount) { + bool debugMode = get_string_parameter("debug", params, paramsCount) == "true"; + std::string modelPath = get_string_parameter("model_path", params, paramsCount, ""); + NODE_ASSERT(!modelPath.empty(), "model_path cannot be empty"); + try { + auto cnlim = std::make_unique(modelPath, debugMode); + if (!cnlim->isValid()) + throw std::exception(); + *customNodeLibraryInternalManager = cnlim.release(); + } catch (...) { + std::cerr << "[detokenizer] initialize() fail: Cannot load tokenization model from path: " << modelPath << std::endl; + return 1; + } + return 0; +} + +int deinitialize(void* customNodeLibraryInternalManager) { + if (customNodeLibraryInternalManager != nullptr) { + BlingFireModel* manager = static_cast(customNodeLibraryInternalManager); + delete manager; + } + return 0; +} + +static int retrieveInputs( + // in + const struct CustomNodeTensor* inputs, + int inputsCount, + // out + const CustomNodeTensor** logitsTensor, + const CustomNodeTensor** inputIdsTensor, + const CustomNodeTensor** attentionMaskTensor) { + for (int i = 0; i < inputsCount; i++) { + if (std::strcmp(inputs[i].name, INPUT_NAME_LOGITS) == 0) { + *logitsTensor = &(inputs[i]); + } else if (std::strcmp(inputs[i].name, INPUT_NAME_PREVIOUS_TOKENS) == 0) { + *inputIdsTensor = &(inputs[i]); + } else if (std::strcmp(inputs[i].name, INPUT_NAME_PREVIOUS_ATTENTION) == 0) { + *attentionMaskTensor = &(inputs[i]); + } else { + std::cerr << "Unrecognized input: " << inputs[i].name << std::endl; + return 1; + } + } + return 0; +} + +static int validateInputs( + const CustomNodeTensor* logitsTensor, + const CustomNodeTensor* inputIdsTensor, + const CustomNodeTensor* attentionMaskTensor) { + NODE_ASSERT(logitsTensor != nullptr, "Missing " INPUT_NAME_LOGITS " input"); + NODE_ASSERT(logitsTensor->precision == FP32, INPUT_NAME_LOGITS " input is not FP32"); + NODE_ASSERT(logitsTensor->dimsCount == 3, "input " INPUT_NAME_LOGITS " shape must have 3 dimensions"); + NODE_ASSERT(logitsTensor->dims[0] > 0, "input " INPUT_NAME_LOGITS " dimension 1 must be larger than 0"); + NODE_ASSERT(logitsTensor->dims[1] > 0, "input " INPUT_NAME_LOGITS " dimension 2 must be larger than 0"); + NODE_ASSERT(logitsTensor->dims[2] > 0, "input " INPUT_NAME_LOGITS " text dimension 3 must be larger than 0"); + + NODE_ASSERT(inputIdsTensor != nullptr, "Missing " INPUT_NAME_PREVIOUS_TOKENS " input"); + NODE_ASSERT(inputIdsTensor->precision == I64, INPUT_NAME_PREVIOUS_TOKENS " input is not I64"); + NODE_ASSERT(inputIdsTensor->dimsCount == 2, INPUT_NAME_PREVIOUS_TOKENS " shape must have 2 dimensions"); + NODE_ASSERT(inputIdsTensor->dims[0] > 0, INPUT_NAME_PREVIOUS_TOKENS " dimension 1 must be larger than 0"); + NODE_ASSERT(inputIdsTensor->dims[1] > 0, INPUT_NAME_PREVIOUS_TOKENS " dimension 2 must be larger than 0"); + + NODE_ASSERT(attentionMaskTensor != nullptr, "Missing " INPUT_NAME_PREVIOUS_ATTENTION " input"); + NODE_ASSERT(attentionMaskTensor->precision == I64, INPUT_NAME_PREVIOUS_ATTENTION " input is not I64"); + NODE_ASSERT(attentionMaskTensor->dimsCount == 2, INPUT_NAME_PREVIOUS_ATTENTION " shape must have 2 dimensions"); + NODE_ASSERT(attentionMaskTensor->dims[0] > 0, INPUT_NAME_PREVIOUS_ATTENTION " dimension 1 must be larger than 0"); + NODE_ASSERT(attentionMaskTensor->dims[1] > 0, INPUT_NAME_PREVIOUS_ATTENTION " dimension 2 must be larger than 0"); + + NODE_ASSERT(logitsTensor->dims[0] == inputIdsTensor->dims[0], INPUT_NAME_LOGITS " and " INPUT_NAME_PREVIOUS_TOKENS " need to have matching batch dimension"); + NODE_ASSERT(logitsTensor->dims[0] == attentionMaskTensor->dims[0], INPUT_NAME_LOGITS " and " INPUT_NAME_PREVIOUS_ATTENTION " need to have matching batch dimension"); + + NODE_ASSERT(logitsTensor->dims[1] == inputIdsTensor->dims[1], INPUT_NAME_LOGITS " and " INPUT_NAME_PREVIOUS_TOKENS " need to have matching second dimension"); + NODE_ASSERT(logitsTensor->dims[1] == attentionMaskTensor->dims[1], INPUT_NAME_LOGITS " and " INPUT_NAME_PREVIOUS_ATTENTION " need to have matching second dimension"); + return 0; +} + +// in: [-1, -1, 50400] +// out: [Batch, MaxLength] +int execute(const struct CustomNodeTensor* inputs, int inputsCount, struct CustomNodeTensor** outputs, int* outputsCount, const struct CustomNodeParam* params, int paramsCount, void* customNodeLibraryInternalManager) { + auto start = std::chrono::steady_clock::now(); + bool debugMode = get_string_parameter("debug", params, paramsCount) == "true"; + DEBUG_MSG("execute() start"); + // Parameters reading + int maxBufferLength = get_int_parameter("max_buffer_length", params, paramsCount, DEFAULT_MAX_BUF_LEN); + NODE_ASSERT(maxBufferLength > 0, "max_buffer_length param must be larger than 0"); + + const CustomNodeTensor* logitsTensor = nullptr; + const CustomNodeTensor* inputIdsTensor = nullptr; + const CustomNodeTensor* attentionMaskTensor = nullptr; + + NODE_ASSERT(retrieveInputs(inputs, inputsCount, &logitsTensor, &inputIdsTensor, &attentionMaskTensor) == 0, "retrieveInputs() failed"); + NODE_ASSERT(validateInputs(logitsTensor, inputIdsTensor, attentionMaskTensor) == 0, "validateInputs() failed"); + + BlingFireModel* model = static_cast(customNodeLibraryInternalManager); + + std::vector results; + for (uint64_t batch = 0; batch < logitsTensor->dims[0]; batch++) { + // get previous tokens of current batch for context + DEBUG_MSG("get previous tokens of batch " << batch); + int64_t* inputIds = reinterpret_cast( + inputIdsTensor->data + + batch * (inputIdsTensor->dims[1] * sizeof(int64_t))); + int64_t* attentionMask = reinterpret_cast( + attentionMaskTensor->data + + batch * (attentionMaskTensor->dims[1] * sizeof(int64_t))); + + int64_t* it = std::find(attentionMask, attentionMask + attentionMaskTensor->dims[1], 0); + std::ptrdiff_t distance = std::distance(attentionMask, it); + std::ptrdiff_t lastNonZeroIndex = distance - 1; + + // case for empty string being in a batch (attention mask all zeros) + if (lastNonZeroIndex < 0) + lastNonZeroIndex = 0; + + std::vector previousTokens(inputIds, inputIds + distance); + + // slice + DEBUG_MSG("slicing batch " << batch); + float* logits = reinterpret_cast( + logitsTensor->data + + batch * (logitsTensor->dims[1] * logitsTensor->dims[2] * sizeof(float)) + // offset by batch + (lastNonZeroIndex * logitsTensor->dims[2] * sizeof(float))); // offset to get last element of second dimension + + // argmax + DEBUG_MSG("argmax batch " << batch); + float* result = std::max_element(logits, logits + logitsTensor->dims[2]); + int64_t token = std::distance(logits, result); + previousTokens.push_back(token); + + // detokenize + DEBUG_MSG("detokenizing token batch " << batch); + auto text = model->detokenize(previousTokens, maxBufferLength); + DEBUG_MSG("detokenized token: (" << token << ") to: (" << text << ") for batch " << batch); + results.emplace_back(std::move(text)); + } + + DEBUG_MSG("getting max string length"); + size_t maxStringLength = 0; + for (const auto& str : results) { + maxStringLength = std::max(maxStringLength, str.size()); + } + size_t width = maxStringLength + 1; + + DEBUG_MSG("prepraing output tensor"); + *outputsCount = 1; + *outputs = (struct CustomNodeTensor*)malloc(*outputsCount * sizeof(CustomNodeTensor)); + if ((*outputs) == nullptr) { + std::cerr << "malloc has failed" << std::endl; + return 1; + } + + // Outputs allocation + CustomNodeTensor& output = (*outputs)[0]; + output.name = OUTPUT_NAME_TEXTS; + output.dataBytes = width * results.size(); + output.data = (uint8_t*)malloc(output.dataBytes); + output.dimsCount = 2; + output.dims = (uint64_t*)malloc(output.dimsCount * sizeof(uint64_t)); + NODE_ASSERT(output.dims != nullptr, "malloc has failed"); + output.dims[0] = results.size(); + output.dims[1] = width; + output.precision = U8; + + DEBUG_MSG("writing output"); + for (size_t i = 0; i < results.size(); i++) { + std::memcpy(output.data + i * width, results[i].data(), results[i].size()); + output.data[i * width + results[i].size()] = 0; + } + DEBUG_MSG("execute() end"); + auto end = std::chrono::steady_clock::now(); + DEBUG_MSG("execute() end; took " << std::chrono::duration_cast(end - start).count() / 1000.f << " ms"); + return 0; +} + +int getInputsInfo(struct CustomNodeTensorInfo** info, int* infoCount, const struct CustomNodeParam* params, int paramsCount, void* customNodeLibraryInternalManager) { + *infoCount = 3; + *info = (struct CustomNodeTensorInfo*)malloc(*infoCount * sizeof(struct CustomNodeTensorInfo)); + NODE_ASSERT((*info) != nullptr, "malloc has failed"); + + (*info)[0].name = INPUT_NAME_LOGITS; + (*info)[0].dimsCount = 3; + (*info)[0].dims = (uint64_t*)malloc((*info)[0].dimsCount * sizeof(uint64_t)); + NODE_ASSERT(((*info)[0].dims) != nullptr, "malloc has failed"); + (*info)[0].dims[0] = -1; + (*info)[0].dims[1] = -1; + (*info)[0].dims[2] = -1; + (*info)[0].precision = FP32; + + (*info)[1].name = INPUT_NAME_PREVIOUS_TOKENS; + (*info)[1].dimsCount = 2; + (*info)[1].dims = (uint64_t*)malloc((*info)[1].dimsCount * sizeof(uint64_t)); + NODE_ASSERT(((*info)[1].dims) != nullptr, "malloc has failed"); + (*info)[1].dims[0] = -1; + (*info)[1].dims[1] = -1; + (*info)[1].precision = I64; + + (*info)[2].name = INPUT_NAME_PREVIOUS_ATTENTION; + (*info)[2].dimsCount = 2; + (*info)[2].dims = (uint64_t*)malloc((*info)[2].dimsCount * sizeof(uint64_t)); + NODE_ASSERT(((*info)[0].dims) != nullptr, "malloc has failed"); + (*info)[2].dims[0] = -1; + (*info)[2].dims[1] = -1; + (*info)[2].precision = I64; + return 0; +} + +int getOutputsInfo(struct CustomNodeTensorInfo** info, int* infoCount, const struct CustomNodeParam* params, int paramsCount, void* customNodeLibraryInternalManager) { + *infoCount = 1; + *info = (struct CustomNodeTensorInfo*)malloc(*infoCount * sizeof(struct CustomNodeTensorInfo)); + NODE_ASSERT((*info) != nullptr, "malloc has failed"); + + (*info)[0].name = OUTPUT_NAME_TEXTS; + (*info)[0].dimsCount = 2; + (*info)[0].dims = (uint64_t*)malloc((*info)->dimsCount * sizeof(uint64_t)); + NODE_ASSERT(((*info)[0].dims) != nullptr, "malloc has failed"); + (*info)[0].dims[0] = -1; + (*info)[0].dims[1] = -1; + (*info)[0].precision = U8; + + return 0; +} + +int release(void* ptr, void* customNodeLibraryInternalManager) { + free(ptr); + return 0; +} diff --git a/src/custom_nodes/tokenizer/src/model.cpp b/src/custom_nodes/tokenizer/src/model.cpp new file mode 100644 index 0000000000..3875e4287d --- /dev/null +++ b/src/custom_nodes/tokenizer/src/model.cpp @@ -0,0 +1,75 @@ +//***************************************************************************** +// Copyright 2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include "model.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "blingfiretokdll.h" // NOLINT + +namespace custom_nodes { +namespace tokenizer { + +static std::atomic maxId{0}; + +BlingFireModel::BlingFireModel(const std::string& modelPath, bool debug) : + id(maxId++), + debug(debug) { + if (!std::filesystem::exists(modelPath)) { + throw std::runtime_error("Model file does not exist: " + modelPath); + } + handle = BlingFire::LoadModel(modelPath.c_str()); + if (debug) { + std::cout << "[BlingFireModel] [" << id << "] Model loaded from: " << modelPath << std::endl; + } +} + +BlingFireModel::~BlingFireModel() { + if (handle) { + BlingFire::FreeModel(handle); + if (debug) { + std::cout << "[BlingFireModel] [" << id << "] Model unloaded." << std::endl; + } + } +} + +std::vector BlingFireModel::tokenize(const std::string& text, int maxIdsArrLength) { + auto ids = std::make_unique(maxIdsArrLength); + const int idsLength = BlingFire::TextToIds(handle, text.c_str(), text.size(), ids.get(), maxIdsArrLength); + std::vector vec(idsLength); + std::transform(ids.get(), ids.get() + idsLength, vec.begin(), + [](int32_t val) { return static_cast(val); }); + return vec; +} + +std::string BlingFireModel::detokenize(const std::vector& tokens, int maxBufferLength, bool skipSpecialTokens) { + auto ids = std::make_unique(tokens.size()); + std::transform(tokens.begin(), tokens.end(), ids.get(), + [](int64_t val) { return static_cast(val); }); + std::string str(maxBufferLength + 1, '\0'); // +1 due to null ending + BlingFire::IdsToText(handle, ids.get(), tokens.size(), str.data(), maxBufferLength, skipSpecialTokens); + str.resize(std::strlen(str.data())); // remove all remaining zero bytes + return str; +} + +} // namespace tokenizer +} // namespace custom_nodes diff --git a/src/custom_nodes/tokenizer/src/model.hpp b/src/custom_nodes/tokenizer/src/model.hpp new file mode 100644 index 0000000000..6c24bc33cc --- /dev/null +++ b/src/custom_nodes/tokenizer/src/model.hpp @@ -0,0 +1,40 @@ +//***************************************************************************** +// Copyright 2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once +#include +#include +#include + +namespace custom_nodes { +namespace tokenizer { + +class BlingFireModel { + int id; + void* handle = nullptr; + bool debug; + +public: + BlingFireModel(const std::string& modelPath, bool debug = false); + ~BlingFireModel(); + + bool isValid() const { return handle != nullptr; } + + std::vector tokenize(const std::string& text, int maxIdsArrLength); + std::string detokenize(const std::vector& tokens, int maxBufferLength, bool skipSpecialTokens = false); +}; + +} // namespace tokenizer +} // namespace custom_nodes diff --git a/src/custom_nodes/tokenizer/src/tokenizer.cpp b/src/custom_nodes/tokenizer/src/tokenizer.cpp new file mode 100644 index 0000000000..cd4b2bfbf9 --- /dev/null +++ b/src/custom_nodes/tokenizer/src/tokenizer.cpp @@ -0,0 +1,214 @@ +//***************************************************************************** +// Copyright 2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include +#include +#include +#include +#include + +#define __STDC_WANT_LIB_EXT1__ 1 // to ensure existence of strnlen +#include + +#include "custom_node_interface.h" // NOLINT +#include "model.hpp" +#include "utils.hpp" + +#define INPUT_NAME_TEXTS "texts" + +#define OUTPUT_NAME_TOKENS "input_ids" +#define OUTPUT_NAME_ATTENTION "attention_mask" + +// Size of memory allocation on the heap for generated tokens. +// If the size of the output is larger than this value, the output is truncated. +// Consider using memory pool. +#define DEFAULT_MAX_ID_ARR_LEN 1024 + +using namespace custom_nodes::tokenizer; + +#define DEBUG_MSG(str) \ + if (debugMode) { \ + std::cout << "[tokenizer] " << str << std::endl; \ + } + +int initialize(void** customNodeLibraryInternalManager, const struct CustomNodeParam* params, int paramsCount) { + bool debugMode = get_string_parameter("debug", params, paramsCount) == "true"; + std::string modelPath = get_string_parameter("model_path", params, paramsCount, ""); + NODE_ASSERT(!modelPath.empty(), "model_path cannot be empty"); + try { + auto cnlim = std::make_unique(modelPath, debugMode); + if (!cnlim->isValid()) + throw std::exception(); + *customNodeLibraryInternalManager = cnlim.release(); + } catch (...) { + std::cerr << "[tokenizer] initialize() fail: Cannot load tokenization model from path: " << modelPath << std::endl; + return 1; + } + return 0; +} + +int deinitialize(void* customNodeLibraryInternalManager) { + if (customNodeLibraryInternalManager != nullptr) { + BlingFireModel* manager = static_cast(customNodeLibraryInternalManager); + delete manager; + } + return 0; +} + +static int retrieveInputs( + // in + const struct CustomNodeTensor* inputs, + int inputsCount, + // out + const CustomNodeTensor** textTensor) { + for (int i = 0; i < inputsCount; i++) { + if (std::strcmp(inputs[i].name, INPUT_NAME_TEXTS) == 0) { + *textTensor = &(inputs[i]); + } else { + std::cerr << "Unrecognized input: " << inputs[i].name << std::endl; + return 1; + } + } + return 0; +} + +static int validateInputs(const CustomNodeTensor* textTensor) { + NODE_ASSERT(textTensor != nullptr, "Missing " INPUT_NAME_TEXTS " input"); + NODE_ASSERT(textTensor->precision == U8, INPUT_NAME_TEXTS " input is not U8"); + + NODE_ASSERT(textTensor->dimsCount == 2, INPUT_NAME_TEXTS " inout shape must have 2 dimensions"); + NODE_ASSERT(textTensor->dims[0] > 0, INPUT_NAME_TEXTS " input dimension 1 must be larger than 0 (number of texts)"); + NODE_ASSERT(textTensor->dims[1] > 0, INPUT_NAME_TEXTS " input dimension 2 must be larger than 0 (max null terminated text length)"); + return 0; +} + +int execute(const struct CustomNodeTensor* inputs, int inputsCount, struct CustomNodeTensor** outputs, int* outputsCount, const struct CustomNodeParam* params, int paramsCount, void* customNodeLibraryInternalManager) { + auto start = std::chrono::steady_clock::now(); + bool debugMode = get_string_parameter("debug", params, paramsCount) == "true"; + DEBUG_MSG("execute() start"); + // Parameters reading + int maxIdsArrLength = get_int_parameter("max_ids_arr_length", params, paramsCount, DEFAULT_MAX_ID_ARR_LEN); + NODE_ASSERT(maxIdsArrLength > 0, "max_ids_arr_length param must be larger than 0"); + + const CustomNodeTensor* textTensor = nullptr; + + NODE_ASSERT(retrieveInputs(inputs, inputsCount, &textTensor) == 0, "retrieveInputs() failed"); + NODE_ASSERT(validateInputs(textTensor) == 0, "validateInputs() failed"); + + BlingFireModel* model = static_cast(customNodeLibraryInternalManager); + + *outputsCount = 2; + *outputs = (struct CustomNodeTensor*)malloc(*outputsCount * sizeof(CustomNodeTensor)); + if ((*outputs) == nullptr) { + std::cerr << "malloc has failed" << std::endl; + return 1; + } + + std::vector> ids(textTensor->dims[0]); + // For each batch, sequentially + for (uint64_t batch = 0; batch < textTensor->dims[0]; batch++) { + DEBUG_MSG("tokenizing batch " << batch); + const char* strStart = (const char*)textTensor->data + batch * textTensor->dims[1]; + std::string text(strStart, strnlen(strStart, textTensor->dims[1])); + ids[batch] = model->tokenize(text, maxIdsArrLength); + DEBUG_MSG("tokenized batch " << batch << "; of string: " << text); + } + + DEBUG_MSG("getting max token size"); + size_t maxTokenSize = 0; + for (const auto& id : ids) { + maxTokenSize = std::max(maxTokenSize, id.size()); + } + + DEBUG_MSG("preparing output tensors"); + CustomNodeTensor& tokens = (*outputs)[0]; + tokens.name = OUTPUT_NAME_TOKENS; + tokens.dataBytes = sizeof(int64_t) * maxTokenSize * ids.size(); + tokens.data = (uint8_t*)malloc(tokens.dataBytes); + tokens.dimsCount = 2; + tokens.dims = (uint64_t*)malloc(tokens.dimsCount * sizeof(uint64_t)); + NODE_ASSERT(tokens.dims != nullptr, "malloc has failed"); + tokens.dims[0] = ids.size(); + tokens.dims[1] = maxTokenSize; + tokens.precision = I64; + + CustomNodeTensor& attention = (*outputs)[1]; + attention.name = OUTPUT_NAME_ATTENTION; + attention.dataBytes = sizeof(int64_t) * maxTokenSize * ids.size(); + attention.data = (uint8_t*)malloc(attention.dataBytes); + attention.dimsCount = 2; + attention.dims = (uint64_t*)malloc(attention.dimsCount * sizeof(uint64_t)); + NODE_ASSERT(attention.dims != nullptr, "malloc has failed"); + attention.dims[0] = ids.size(); + attention.dims[1] = maxTokenSize; + attention.precision = I64; + + DEBUG_MSG("writing output"); + for (size_t i = 0; i < ids.size(); i++) { + std::memcpy(tokens.data + i * maxTokenSize * sizeof(int64_t), ids[i].data(), ids[i].size() * sizeof(int64_t)); + for (size_t j = 0; j < ids[i].size(); j++) { + ((int64_t*)attention.data)[i * maxTokenSize + j] = 1; + } + for (size_t j = ids[i].size(); j < maxTokenSize; j++) { + ((int64_t*)attention.data)[i * maxTokenSize + j] = 0; + } + } + auto end = std::chrono::steady_clock::now(); + DEBUG_MSG("execute() end; took " << std::chrono::duration_cast(end - start).count() / 1000.f << " ms"); + return 0; +} + +int getInputsInfo(struct CustomNodeTensorInfo** info, int* infoCount, const struct CustomNodeParam* params, int paramsCount, void* customNodeLibraryInternalManager) { + *infoCount = 1; + *info = (struct CustomNodeTensorInfo*)malloc(*infoCount * sizeof(struct CustomNodeTensorInfo)); + NODE_ASSERT((*info) != nullptr, "malloc has failed"); + + (*info)[0].name = INPUT_NAME_TEXTS; + (*info)[0].dimsCount = 2; + (*info)[0].dims = (uint64_t*)malloc((*info)[0].dimsCount * sizeof(uint64_t)); + NODE_ASSERT(((*info)[0].dims) != nullptr, "malloc has failed"); + (*info)[0].dims[0] = -1; + (*info)[0].dims[1] = -1; + (*info)[0].precision = U8; + return 0; +} + +int getOutputsInfo(struct CustomNodeTensorInfo** info, int* infoCount, const struct CustomNodeParam* params, int paramsCount, void* customNodeLibraryInternalManager) { + *infoCount = 2; + *info = (struct CustomNodeTensorInfo*)malloc(*infoCount * sizeof(struct CustomNodeTensorInfo)); + NODE_ASSERT((*info) != nullptr, "malloc has failed"); + + (*info)[0].name = OUTPUT_NAME_TOKENS; + (*info)[0].dimsCount = 2; + (*info)[0].dims = (uint64_t*)malloc((*info)->dimsCount * sizeof(uint64_t)); + NODE_ASSERT(((*info)[0].dims) != nullptr, "malloc has failed"); + (*info)[0].dims[0] = -1; + (*info)[0].dims[1] = -1; + (*info)[0].precision = I64; + + (*info)[1].name = OUTPUT_NAME_ATTENTION; + (*info)[1].dimsCount = 2; + (*info)[1].dims = (uint64_t*)malloc((*info)->dimsCount * sizeof(uint64_t)); + NODE_ASSERT(((*info)[1].dims) != nullptr, "malloc has failed"); + (*info)[1].dims[0] = -1; + (*info)[1].dims[1] = -1; + (*info)[1].precision = I64; + return 0; +} + +int release(void* ptr, void* customNodeLibraryInternalManager) { + free(ptr); + return 0; +} diff --git a/src/custom_nodes/tokenizer/test/CMakeLists.txt b/src/custom_nodes/tokenizer/test/CMakeLists.txt new file mode 100644 index 0000000000..741bb7f835 --- /dev/null +++ b/src/custom_nodes/tokenizer/test/CMakeLists.txt @@ -0,0 +1,48 @@ +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +cmake_minimum_required(VERSION 3.10) +project(test) + +set(CMAKE_CXX_STANDARD 17) + +# Download and install Google Test +include(FetchContent) +FetchContent_Declare( + googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG release-1.11.0 +) +FetchContent_MakeAvailable(googletest) + +set(UTIL_DIRS + ../src/ # for model.hpp + ../../../) # for custom_node_interface.h + +file(DOWNLOAD + https://github.com/microsoft/BlingFire/raw/${BLINGFIRE_SHA}/ldbsrc/ldb/gpt2.bin + ${CMAKE_BINARY_DIR}/gpt2.bin) + +file(DOWNLOAD + https://github.com/microsoft/BlingFire/raw/${BLINGFIRE_SHA}/ldbsrc/ldb/gpt2.i2w + ${CMAKE_BINARY_DIR}/gpt2.i2w) + +add_executable(tokenization_test tokenization_test.cpp) +target_include_directories(tokenization_test PRIVATE ${UTIL_DIRS}) +target_link_libraries(tokenization_test gtest_main tokenizer) + +add_executable(detokenization_test detokenization_test.cpp) +target_include_directories(detokenization_test PRIVATE ${UTIL_DIRS}) +target_link_libraries(detokenization_test gtest_main detokenizer) diff --git a/src/custom_nodes/tokenizer/test/detokenization_test.cpp b/src/custom_nodes/tokenizer/test/detokenization_test.cpp new file mode 100644 index 0000000000..60f2902bad --- /dev/null +++ b/src/custom_nodes/tokenizer/test/detokenization_test.cpp @@ -0,0 +1,280 @@ +//***************************************************************************** +// Copyright 2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include +#include +#include + +#include + +#include "custom_node_interface.h" // NOLINT +#include "model.hpp" + +using namespace custom_nodes::tokenizer; + +#define TEST_MODEL_FILE_PATH "./gpt2.i2w" + +#define INPUT_NAME_LOGITS "logits" +#define INPUT_NAME_PREVIOUS_TOKENS "input_ids" +#define INPUT_NAME_PREVIOUS_ATTENTION "attention_mask" + +#define OUTPUT_NAME_TEXTS "texts" + +TEST(DetokenizerTest, Run) { + BlingFireModel model(TEST_MODEL_FILE_PATH); + int maxBufferLen = 1024; + auto result = model.detokenize({23294, 241, 22174, 28618, 2515, 94, 31676}, maxBufferLen); + ASSERT_EQ(result, "こんにちは"); +} + +TEST(DetokenizerTest, Run_TooSmallBuffer) { + BlingFireModel model(TEST_MODEL_FILE_PATH); + int maxBufferLen = 4; + auto result = model.detokenize({23294, 241, 22174, 28618, 2515, 94, 31676}, maxBufferLen); + ASSERT_EQ(result, "こ"); +} + +TEST(DetokenizerTest, init_deinit) { + void* model = nullptr; + struct CustomNodeParam params[1]; + params[0].key = "model_path"; + params[0].value = TEST_MODEL_FILE_PATH; + int ret = initialize(&model, params, 1); + ASSERT_EQ(ret, 0); + ASSERT_NE(model, nullptr); + + ret = deinitialize(model); + ASSERT_EQ(ret, 0); + + model = nullptr; + params[0].value = "../invalid.bin"; + ret = initialize(&model, params, 1); + ASSERT_NE(ret, 0); + ASSERT_EQ(model, nullptr); + + ret = deinitialize(model); + ASSERT_EQ(ret, 0); +} + +TEST(DetokenizerTest, inputs_info) { + struct CustomNodeTensorInfo* info = nullptr; + int infoCount = 0; + struct CustomNodeParam params[1]; + params[0].key = "model_path"; + params[0].value = TEST_MODEL_FILE_PATH; + + BlingFireModel model(params[0].value); + + int ret = getInputsInfo(&info, &infoCount, params, 1, (void*)&model); + ASSERT_EQ(ret, 0); + ASSERT_EQ(infoCount, 3); + + ASSERT_EQ(std::strcmp(info[0].name, INPUT_NAME_LOGITS), 0); + ASSERT_EQ(info[0].dimsCount, 3); + ASSERT_EQ(info[0].dims[0], -1); + ASSERT_EQ(info[0].dims[1], -1); + ASSERT_EQ(info[0].dims[2], -1); + ASSERT_EQ(info[0].precision, FP32); + + ASSERT_EQ(std::strcmp(info[1].name, INPUT_NAME_PREVIOUS_TOKENS), 0); + ASSERT_EQ(info[1].dimsCount, 2); + ASSERT_EQ(info[1].dims[0], -1); + ASSERT_EQ(info[1].dims[1], -1); + ASSERT_EQ(info[1].precision, I64); + + ASSERT_EQ(std::strcmp(info[2].name, INPUT_NAME_PREVIOUS_ATTENTION), 0); + ASSERT_EQ(info[2].dimsCount, 2); + ASSERT_EQ(info[2].dims[0], -1); + ASSERT_EQ(info[2].dims[1], -1); + ASSERT_EQ(info[2].precision, I64); + + ret = release(info, (void*)&model); + ASSERT_EQ(ret, 0); +} + +TEST(DetokenizerTest, outputs_info) { + struct CustomNodeTensorInfo* info = nullptr; + int infoCount = 0; + struct CustomNodeParam params[1]; + params[0].key = "model_path"; + params[0].value = TEST_MODEL_FILE_PATH; + + BlingFireModel model(params[0].value); + + int ret = getOutputsInfo(&info, &infoCount, params, 1, (void*)&model); + ASSERT_EQ(ret, 0); + ASSERT_EQ(infoCount, 1); + + ASSERT_EQ(std::strcmp(info[0].name, "texts"), 0); + ASSERT_EQ(info[0].dimsCount, 2); + ASSERT_EQ(info[0].dims[0], -1); + ASSERT_EQ(info[0].dims[1], -1); + ASSERT_EQ(info[0].precision, U8); + + ret = release(info, (void*)&model); + ASSERT_EQ(ret, 0); +} + +static void prepare(std::vector data, std::vector shape, std::vector> previousTokens, struct CustomNodeTensor* tensors) { + // logits + struct CustomNodeTensor* tensor = &tensors[0]; + tensor->dataBytes = data.size() * sizeof(float); + tensor->data = (uint8_t*)malloc(tensor->dataBytes); + std::memcpy(tensor->data, reinterpret_cast(data.data()), tensor->dataBytes); + + tensor->dimsCount = shape.size(); + tensor->dims = (uint64_t*)malloc(tensor->dimsCount * sizeof(uint64_t)); + int i = 0; + for (size_t dim : shape) { + tensor->dims[i] = dim; + i++; + } + + tensor->precision = FP32; + tensor->name = INPUT_NAME_LOGITS; + // [2, 8, 50400] + + // input_ids + tensor = &tensors[1]; + tensor->dataBytes = shape[0] * shape[1] * sizeof(int64_t); + tensor->data = (uint8_t*)malloc(tensor->dataBytes); + for (int i = 0; i < shape[0]; i++) { + std::memcpy( + tensor->data + i * shape[1] * sizeof(int64_t), + reinterpret_cast(previousTokens[i].data()), previousTokens[i].size() * sizeof(int64_t)); + } + + tensor->dimsCount = 2; + tensor->dims = (uint64_t*)malloc(tensor->dimsCount * sizeof(uint64_t)); + tensor->dims[0] = shape[0]; + tensor->dims[1] = shape[1]; + + tensor->precision = I64; + tensor->name = INPUT_NAME_PREVIOUS_TOKENS; + // [2, 8] + + // attention_mask + tensor = &tensors[2]; + tensor->dataBytes = shape[0] * shape[1] * sizeof(int64_t); + tensor->data = (uint8_t*)malloc(tensor->dataBytes); + for (int i = 0; i < shape[0]; i++) { + for (int j = 0; j < shape[1]; j++) { + if (j < previousTokens[i].size()) { + reinterpret_cast(tensor->data)[i * shape[1] + j] = 1; + } else { + reinterpret_cast(tensor->data)[i * shape[1] + j] = 0; + } + } + } + + tensor->dimsCount = 2; + tensor->dims = (uint64_t*)malloc(tensor->dimsCount * sizeof(uint64_t)); + tensor->dims[0] = shape[0]; + tensor->dims[1] = shape[1]; + + tensor->precision = I64; + tensor->name = INPUT_NAME_PREVIOUS_ATTENTION; + // [2, 8] +} + +class DetokenizerFixtureTest : public ::testing::Test { +protected: + void run(std::vector data, std::vector shape, std::vector> previousTokens, std::vector& out) { + ASSERT_EQ(shape.size(), 3); + struct CustomNodeTensor inputs[3]; + struct CustomNodeTensor* outputs = nullptr; + int outputsCount = 0; + prepare(data, shape, previousTokens, inputs); + int ret = execute(inputs, 3, &outputs, &outputsCount, params, 3, model); + for (int i = 0; i < 3; i++) { + free(inputs[i].data); + free(inputs[i].dims); + } + ASSERT_EQ(ret, 0); + ASSERT_EQ(outputsCount, 1); + std::vector results; + results.resize(outputs->dims[0]); + for (int i = 0; i < outputsCount; i++) { + if (std::strcmp(outputs[i].name, "texts") == 0) { + for (int j = 0; j < outputs[i].dims[0]; j++) { + char* str = (char*)outputs[i].data + j * outputs[i].dims[1]; + results[j] = std::string(str); + } + } else { + FAIL() << "Unknown output name: " << outputs[i].name; + } + } + out = results; + ASSERT_EQ(release(outputs, model), 0); + } + void SetUp() override { + params[0].key = "model_path"; + params[0].value = TEST_MODEL_FILE_PATH; + params[1].key = "max_buffer_length"; + params[1].value = "1024"; + params[2].key = "debug"; + params[2].value = "true"; + int ret = initialize(&model, params, 3); + ASSERT_EQ(ret, 0); + ASSERT_NE(model, nullptr); + } + void TearDown() override { + int ret = deinitialize(model); + ASSERT_EQ(ret, 0); + } + struct CustomNodeParam params[3]; + void* model = nullptr; +}; + +TEST_F(DetokenizerFixtureTest, execute) { + std::vector outputs; + + // single batch, single previous token + run({1.0, 2.0, 3.0, 1.5}, {1, 1, 4}, {{18435}}, outputs); + ASSERT_EQ(outputs.size(), 1); + ASSERT_EQ(outputs[0], "Hello#"); + + // single batch, 3 previous tokens + outputs.clear(); + run({9.4, 0.2, -0.82, -0.74, 4.2, 1.9, 0.2, 0.95, /**/ 1.0, 2.0, 3.0, 1.5 /**/}, {1, 3, 4}, {{23294, 241, 22174}}, outputs); + ASSERT_EQ(outputs.size(), 1); + ASSERT_EQ(outputs[0], "こん#"); + + // single batch, 3 previous tokens, different token predicted + outputs.clear(); + run({9.4, 0.2, -0.82, -0.74, 4.2, 1.9, 12.2, 0.95, /**/ 0.46, 1.18, 1.16, 1.02 /**/}, {1, 3, 4}, {{23294, 241, 22174}}, outputs); + ASSERT_EQ(outputs.size(), 1); + ASSERT_EQ(outputs[0], "こん\""); + + // 2 batches, 2 previous tokens + outputs.clear(); + run({9.4, 0.2, -0.82, -0.74, /*start 0*/ 0.46, 1.18, 1.16, 1.02 /*end 0*/, 4.2, 1.9, 0.2, 0.95, /*start 1*/ 1.0, 2.0, 3.0, 1.5 /*end 1*/}, {2, 2, 4}, {{18435, 995}, {18435, 995}}, outputs); + ASSERT_EQ(outputs.size(), 2); + ASSERT_EQ(outputs[0], "Hello world\""); + ASSERT_EQ(outputs[1], "Hello world#"); + + // 2 batches, different number of previous tokens (labeled by attention mask under the hood) + outputs.clear(); + run({9.4, 0.2, -0.82, -0.74, /*start 0*/ 0.46, 1.18, 1.16, 1.02 /*end 0*/, /*start 1*/ 4.2, 1.9, 0.2, 0.95, /*end 1*/ 1.0, 2.0, 3.0, 1.5}, {2, 2, 4}, {{18435, 995}, {18435}}, outputs); + ASSERT_EQ(outputs.size(), 2); + ASSERT_EQ(outputs[0], "Hello world\""); + ASSERT_EQ(outputs[1], "Hello!"); + + outputs.clear(); + run({9.4, 0.2, -0.82, -0.74, /*start 0*/ 0.46, 1.18, 1.16, 1.02 /*end 0*/, /*start 1*/ 4.2, 1.9, 0.2, 0.95, /*end 1*/ 1.0, 2.0, 3.0, 1.5}, {2, 2, 4}, {{18435, 995}, {}}, outputs); + ASSERT_EQ(outputs.size(), 2); + ASSERT_EQ(outputs[0], "Hello world\""); + ASSERT_EQ(outputs[1], "!"); +} diff --git a/src/custom_nodes/tokenizer/test/tokenization_test.cpp b/src/custom_nodes/tokenizer/test/tokenization_test.cpp new file mode 100644 index 0000000000..9a38d890eb --- /dev/null +++ b/src/custom_nodes/tokenizer/test/tokenization_test.cpp @@ -0,0 +1,232 @@ +//***************************************************************************** +// Copyright 2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include +#include +#include + +#include + +#include "custom_node_interface.h" // NOLINT +#include "model.hpp" + +#define TEST_MODEL_FILE_PATH "./gpt2.bin" + +#define INPUT_NAME_TEXTS "texts" + +#define OUTPUT_NAME_TOKENS "input_ids" +#define OUTPUT_NAME_ATTENTION "attention_mask" + +using namespace custom_nodes::tokenizer; + +TEST(TokenizerTest, Run) { + BlingFireModel model(TEST_MODEL_FILE_PATH); + int maxIdArrLen = 1024; + auto result = model.tokenize("こんにちは", maxIdArrLen); + std::vector expected = {23294, 241, 22174, 28618, 2515, 94, 31676}; + ASSERT_EQ(result.size(), expected.size()); + for (int i = 0; i < result.size(); i++) { + EXPECT_EQ(result[i], expected[i]) << "expected: " << expected[i] << "; actual: " << result[i]; + } +} + +TEST(TokenizerTest, Run_TooSmallBuffer) { + BlingFireModel model(TEST_MODEL_FILE_PATH); + int maxIdArrLen = 4; + auto result = model.tokenize("こんにちは", maxIdArrLen); + std::vector expected = {23294, 241, 22174, 28618}; + ASSERT_EQ(result.size(), expected.size()); + for (int i = 0; i < result.size(); i++) { + EXPECT_EQ(result[i], expected[i]) << "expected: " << expected[i] << "; actual: " << result[i]; + } +} + +TEST(TokenizerTest, init_deinit) { + void* model = nullptr; + struct CustomNodeParam params[1]; + params[0].key = "model_path"; + params[0].value = TEST_MODEL_FILE_PATH; + int ret = initialize(&model, params, 1); + ASSERT_EQ(ret, 0); + ASSERT_NE(model, nullptr); + + ret = deinitialize(model); + ASSERT_EQ(ret, 0); + + model = nullptr; + params[0].value = "../invalid.bin"; + ret = initialize(&model, params, 1); + ASSERT_NE(ret, 0); + ASSERT_EQ(model, nullptr); + + ret = deinitialize(model); + ASSERT_EQ(ret, 0); +} + +TEST(TokenizerTest, inputs_info) { + struct CustomNodeTensorInfo* info = nullptr; + int infoCount = 0; + struct CustomNodeParam params[1]; + params[0].key = "model_path"; + params[0].value = TEST_MODEL_FILE_PATH; + + BlingFireModel model(params[0].value); + + int ret = getInputsInfo(&info, &infoCount, params, 1, (void*)&model); + ASSERT_EQ(ret, 0); + ASSERT_EQ(infoCount, 1); + ASSERT_EQ(std::strcmp(info[0].name, INPUT_NAME_TEXTS), 0); + ASSERT_EQ(info[0].dimsCount, 2); + ASSERT_EQ(info[0].dims[0], -1); + ASSERT_EQ(info[0].dims[1], -1); + ASSERT_EQ(info[0].precision, U8); + ret = release(info, (void*)&model); + ASSERT_EQ(ret, 0); +} + +TEST(TokenizerTest, outputs_info) { + struct CustomNodeTensorInfo* info = nullptr; + int infoCount = 0; + struct CustomNodeParam params[1]; + params[0].key = "model_path"; + params[0].value = TEST_MODEL_FILE_PATH; + + BlingFireModel model(params[0].value); + + int ret = getOutputsInfo(&info, &infoCount, params, 1, (void*)&model); + ASSERT_EQ(ret, 0); + ASSERT_EQ(infoCount, 2); + + ASSERT_EQ(std::strcmp(info[0].name, OUTPUT_NAME_TOKENS), 0); + ASSERT_EQ(info[0].dimsCount, 2); + ASSERT_EQ(info[0].dims[0], -1); + ASSERT_EQ(info[0].dims[1], -1); + ASSERT_EQ(info[0].precision, I64); + + ASSERT_EQ(std::strcmp(info[1].name, OUTPUT_NAME_ATTENTION), 0); + ASSERT_EQ(info[1].dimsCount, 2); + ASSERT_EQ(info[1].dims[0], -1); + ASSERT_EQ(info[1].dims[1], -1); + ASSERT_EQ(info[1].precision, I64); + + ret = release(info, (void*)&model); + ASSERT_EQ(ret, 0); +} + +static void putStringsToTensor(std::vector strings, struct CustomNodeTensor& tensor) { + size_t maxStringLength = 0; + for (auto& str : strings) { + maxStringLength = std::max(str.size(), maxStringLength); + } + size_t width = maxStringLength + 1; + + tensor.dataBytes = strings.size() * width * sizeof(uint8_t); + tensor.data = (uint8_t*)malloc(tensor.dataBytes); + + int i = 0; + for (auto& str : strings) { + std::memcpy(tensor.data + i * width, str.c_str(), str.size()); + tensor.data[i * width + str.size()] = 0; + i++; + } + + tensor.dimsCount = 2; + tensor.dims = (uint64_t*)malloc(2 * sizeof(uint64_t)); + tensor.dims[0] = strings.size(); + tensor.dims[1] = width; + + tensor.precision = U8; + tensor.name = INPUT_NAME_TEXTS; +} + +class TokenizerFixtureTest : public ::testing::Test { +protected: + struct output { + std::vector tokens; + std::vector attention; + }; + void run(std::vector in, std::vector& out) { + struct CustomNodeTensor inputs[1]; + struct CustomNodeTensor* outputs = nullptr; + int outputsCount = 0; + putStringsToTensor(in, inputs[0]); + int ret = execute(inputs, 1, &outputs, &outputsCount, params, 3, model); + free(inputs[0].data); + free(inputs[0].dims); + ASSERT_EQ(ret, 0); + ASSERT_EQ(outputsCount, 2); + std::vector result; + result.resize(outputs->dims[0]); + for (int i = 0; i < outputsCount; i++) { + if (std::strcmp(outputs[i].name, OUTPUT_NAME_ATTENTION) == 0) { + for (int j = 0; j < outputs[i].dims[0]; j++) { + result[j].attention = std::vector( + (int64_t*)outputs[i].data + j * outputs[i].dims[1], + (int64_t*)outputs[i].data + j * outputs[i].dims[1] + outputs[i].dims[1]); + } + } else if (std::strcmp(outputs[i].name, OUTPUT_NAME_TOKENS) == 0) { + for (int j = 0; j < outputs[i].dims[0]; j++) { + result[j].tokens = std::vector( + (int64_t*)outputs[i].data + j * outputs[i].dims[1], + (int64_t*)outputs[i].data + j * outputs[i].dims[1] + outputs[i].dims[1]); + } + } else { + FAIL() << "Unknown output name: " << outputs[i].name; + } + } + out = result; + ASSERT_EQ(release(outputs, model), 0); + } + void SetUp() override { + params[0].key = "model_path"; + params[0].value = TEST_MODEL_FILE_PATH; + params[1].key = "max_ids_arr_length"; + params[1].value = "1024"; + params[2].key = "debug"; + params[2].value = "true"; + int ret = initialize(&model, params, 3); + ASSERT_EQ(ret, 0); + ASSERT_NE(model, nullptr); + } + void TearDown() override { + int ret = deinitialize(model); + ASSERT_EQ(ret, 0); + } + struct CustomNodeParam params[3]; + void* model = nullptr; +}; + +TEST_F(TokenizerFixtureTest, execute) { + std::vector outputs; + run({"", "Hello world!", "こんにちは"}, outputs); + ASSERT_EQ(outputs.size(), 3); + + // "" + ASSERT_EQ(outputs[0].tokens.size(), 7); + ASSERT_EQ(outputs[0].attention.size(), 7); + ASSERT_EQ(std::memcmp(outputs[0].attention.data(), std::vector{0, 0, 0, 0, 0, 0, 0}.data(), 7 * sizeof(int64_t)), 0); + + // "Hello world!" + ASSERT_EQ(outputs[1].tokens.size(), 7); + ASSERT_EQ(outputs[1].attention.size(), 7); + ASSERT_EQ(std::memcmp(outputs[1].tokens.data(), std::vector{18435, 995, 0}.data(), 3 * sizeof(int64_t)), 0); + ASSERT_EQ(std::memcmp(outputs[1].attention.data(), std::vector{1, 1, 1, 0, 0, 0, 0}.data(), 7 * sizeof(int64_t)), 0); + + // "こんにちは" + ASSERT_EQ(outputs[1].tokens.size(), 7); + ASSERT_EQ(outputs[1].attention.size(), 7); + ASSERT_EQ(std::memcmp(outputs[2].tokens.data(), std::vector{23294, 241, 22174, 28618, 2515, 94, 31676}.data(), 7 * sizeof(int64_t)), 0); + ASSERT_EQ(std::memcmp(outputs[2].attention.data(), std::vector{1, 1, 1, 1, 1, 1, 1}.data(), 7 * sizeof(int64_t)), 0); +} diff --git a/src/customloaderconfig.hpp b/src/customloaderconfig.hpp index 8251acc2fb..fffe65580a 100644 --- a/src/customloaderconfig.hpp +++ b/src/customloaderconfig.hpp @@ -48,6 +48,11 @@ class CustomLoaderConfig { */ std::string loaderConfigFile; + /** + * @brief Json config directory path + */ + std::string rootDirectoryPath; + public: /** * @brief Construct a new Custom Loader Config object @@ -103,7 +108,16 @@ class CustomLoaderConfig { * @param libraryPath */ void setLibraryPath(const std::string& libraryPath) { - this->libraryPath = libraryPath; + FileSystem::setPath(this->libraryPath, libraryPath, this->rootDirectoryPath); + } + + /** + * @brief Set root directory path + * + * @param rootDirectoryPath + */ + void setRootDirectoryPath(const std::string& rootDirectoryPath) { + this->rootDirectoryPath = rootDirectoryPath; } /** @@ -135,6 +149,9 @@ class CustomLoaderConfig { this->setLibraryPath(v["library_path"].GetString()); if (v.HasMember("loader_config_file")) this->setLoaderConfigFile(v["loader_config_file"].GetString()); + } catch (std::logic_error& e) { + SPDLOG_DEBUG("Relative path error: {}", e.what()); + return StatusCode::INTERNAL_ERROR; } catch (...) { SPDLOG_ERROR("There was an error parsing the custom loader config"); return StatusCode::JSON_INVALID; diff --git a/src/dags/custom_node_library_manager.cpp b/src/dags/custom_node_library_manager.cpp index 87acf0f80f..53bcde93cc 100644 --- a/src/dags/custom_node_library_manager.cpp +++ b/src/dags/custom_node_library_manager.cpp @@ -31,6 +31,11 @@ Status CustomNodeLibraryManager::loadLibrary(const std::string& name, const std: return StatusCode::PATH_INVALID; } + if (basePath.at(0) != '/') { + SPDLOG_LOGGER_ERROR(modelmanager_logger, "Path {} is relative, should have already been constructed as full path for library {}.", basePath, name); + return StatusCode::PATH_INVALID; + } + auto it = libraries.find(name); if (it != libraries.end() && it->second.basePath == basePath) { SPDLOG_LOGGER_DEBUG(modelmanager_logger, "Custom node library name: {} is already loaded", name); diff --git a/src/dags/custom_node_output_allocator.cpp b/src/dags/custom_node_output_allocator.cpp index 0893e0da3b..b4e46d9c1f 100644 --- a/src/dags/custom_node_output_allocator.cpp +++ b/src/dags/custom_node_output_allocator.cpp @@ -19,6 +19,7 @@ #include "../logging.hpp" namespace ovms { + bool operator==(const CustomNodeTensor& t1, const CustomNodeTensor& t2) { return (t1.name == t2.name) && (t1.data == t2.data) && @@ -45,11 +46,5 @@ bool CustomNodeOutputAllocator::is_equal(const CustomNodeOutputAllocator& other) (nodeLibrary == other.nodeLibrary) && (tensor == other.tensor); } -bool CustomNodeOutputAllocator::is_equal(const AllocatorImpl& other) const { - const CustomNodeOutputAllocator* otherPtr = dynamic_cast(&other); - if (otherPtr == nullptr) { - return false; - } - return this->is_equal(*otherPtr); -} + } // namespace ovms diff --git a/src/dags/custom_node_output_allocator.hpp b/src/dags/custom_node_output_allocator.hpp index fbc1e87bcf..3c0bdf3d88 100644 --- a/src/dags/custom_node_output_allocator.hpp +++ b/src/dags/custom_node_output_allocator.hpp @@ -24,16 +24,16 @@ namespace ovms { bool operator==(const CustomNodeTensor& t1, const CustomNodeTensor& t2); -class CustomNodeOutputAllocator : public ov::AllocatorImpl { +class CustomNodeOutputAllocator { struct ::CustomNodeTensor tensor; NodeLibrary nodeLibrary; void* customNodeLibraryInternalManager; public: CustomNodeOutputAllocator(struct CustomNodeTensor tensor, NodeLibrary nodeLibrary, void* customNodeLibraryInternalManager); - void* allocate(const size_t bytes, const size_t alignment = alignof(max_align_t)) override; - void deallocate(void* handle, const size_t bytes, size_t alignment = alignof(max_align_t)) override; + void* allocate(const size_t bytes, const size_t alignment = alignof(max_align_t)); + void deallocate(void* handle, const size_t bytes, size_t alignment = alignof(max_align_t)); bool is_equal(const CustomNodeOutputAllocator& other) const; - bool is_equal(const AllocatorImpl& other) const override; }; + } // namespace ovms diff --git a/src/dags/customnodesession.cpp b/src/dags/customnodesession.cpp index 654ac838ec..891e050651 100644 --- a/src/dags/customnodesession.cpp +++ b/src/dags/customnodesession.cpp @@ -211,8 +211,7 @@ Status CustomNodeSession::createTensor(const struct CustomNodeTensor* tensor, ov error.str()); return StatusCode::NODE_LIBRARY_INVALID_CONTENT_SIZE; } - auto allocatorImpl = std::make_shared(*tensor, library, customNodeLibraryInternalManager); - auto allocator = ov::Allocator(allocatorImpl); + auto allocator = CustomNodeOutputAllocator(*tensor, library, customNodeLibraryInternalManager); try { switch (tensor->precision) { case CustomNodeTensorPrecision::FP32: diff --git a/src/dags/dlnodesession.cpp b/src/dags/dlnodesession.cpp index a4042e10d7..5c2a4a7554 100644 --- a/src/dags/dlnodesession.cpp +++ b/src/dags/dlnodesession.cpp @@ -154,7 +154,7 @@ Status DLNodeSession::validate(const ov::Tensor& tensor, const TensorInfo& tenso ss << "Node: " << getName() << " input: " << tensorInfo.getName() << " Invalid shape -" << " Expected: " << tensorInfo.getShape().toString() - << "; Actual: " << TensorInfo::shapeToString(dims); + << "; Actual: " << shapeToString(dims); const std::string details = ss.str(); SPDLOG_LOGGER_DEBUG(dag_executor_logger, details); return Status(StatusCode::INVALID_SHAPE, details); @@ -166,7 +166,7 @@ Status DLNodeSession::validate(const ov::Tensor& tensor, const TensorInfo& tenso ss << "Node: " << getName() << " input: " << tensorInfo.getName() << " Invalid shape -" << " Expected: " << tensorInfo.getShape().toString() - << "; Actual: " << TensorInfo::shapeToString(dims); + << "; Actual: " << shapeToString(dims); const std::string details = ss.str(); SPDLOG_LOGGER_DEBUG(dag_executor_logger, details); return Status(StatusCode::INVALID_SHAPE, details); diff --git a/src/dags/entry_node.cpp b/src/dags/entry_node.cpp index c64f6d31f8..902bea4b14 100644 --- a/src/dags/entry_node.cpp +++ b/src/dags/entry_node.cpp @@ -21,7 +21,6 @@ #include #include -#include "../binaryutils.hpp" #include "../capi_frontend/capi_utils.hpp" #include "../deserialization.hpp" #include "../kfs_frontend/kfs_utils.hpp" @@ -29,6 +28,7 @@ #include "../ov_utils.hpp" #include "../predict_request_validation_utils.hpp" #include "../profiler.hpp" +#include "../tensor_conversion.hpp" #include "../tfs_frontend/tfs_utils.hpp" #include "nodesession.hpp" diff --git a/src/dags/exit_node.cpp b/src/dags/exit_node.cpp index 70b32fa9d3..eea3766e51 100644 --- a/src/dags/exit_node.cpp +++ b/src/dags/exit_node.cpp @@ -64,7 +64,7 @@ template Status ExitNode::fetchResults(const TensorMap& inputTensors) { OutputGetter outputGetter(inputTensors); static const model_version_t version{1}; - return serializePredictResponse(outputGetter, pipelineName, version, this->outputsInfo, this->response, getOutputMapKeyName); + return serializePredictResponse(outputGetter, pipelineName, version, this->outputsInfo, this->response, getOutputMapKeyName, useSharedOutputContent); } template diff --git a/src/dags/node.cpp b/src/dags/node.cpp index 695e451b51..6c12eed8bc 100644 --- a/src/dags/node.cpp +++ b/src/dags/node.cpp @@ -23,8 +23,8 @@ #include "../logging.hpp" #include "../ov_utils.hpp" #include "../profiler.hpp" +#include "../shape.hpp" #include "../status.hpp" -#include "../tensorinfo.hpp" #include "nodesession.hpp" #include "tensormap.hpp" @@ -62,11 +62,11 @@ Status Node::fetchResults(session_key_t sessionId, SessionResults& nodeSessionOu OVMS_PROFILE_FUNCTION(); auto it = nodeSessions.find(sessionId); - auto& nodeSession = it->second; if (it == nodeSessions.end()) { SPDLOG_LOGGER_ERROR(dag_executor_logger, "Could not find session: {} for node: {}", sessionId, getName()); return StatusCode::UNKNOWN_ERROR; } + auto& nodeSession = it->second; auto status = fetchResults(*nodeSession, nodeSessionOutputs); if (status.ok() && demultiplexCount) { SPDLOG_LOGGER_DEBUG(dag_executor_logger, "Will demultiply node: {} outputs with demultiplyCount: {}", getName(), demultiplyCountSettingToString(demultiplexCount)); @@ -252,7 +252,7 @@ Status Node::demultiplyOutputs(SessionResults& nodeSessionOutputs) { if (dag_executor_logger->level() <= spdlog::level::debug) { std::stringstream ss; ss << "Node: " << getName() << " input demultiplied: " << tensorName - << "; Actual: " << TensorInfo::shapeToString(dividedTensor.get_shape()); + << "; Actual: " << shapeToString(dividedTensor.get_shape()); SPDLOG_LOGGER_DEBUG(dag_executor_logger, "{}", ss.str()); } auto sessionKey = newSessionMetadatas[i].getSessionKey(); diff --git a/src/dags/node_library_utils.cpp b/src/dags/node_library_utils.cpp index b17d6e3b42..01f2ffbb25 100644 --- a/src/dags/node_library_utils.cpp +++ b/src/dags/node_library_utils.cpp @@ -104,7 +104,7 @@ std::unique_ptr createCustomNodeTensorArray(const Ten return inputTensors; } -Status createTensorInfoMap(struct CustomNodeTensorInfo* info, int infoCount, std::map>& out, release_fn freeCallback, void* customNodeLibraryInternalManager) { +Status createTensorInfoMap(struct CustomNodeTensorInfo* info, int infoCount, std::map>& out, release_fn freeCallback, void* customNodeLibraryInternalManager) { if (info == nullptr) { return StatusCode::NODE_LIBRARY_OUTPUTS_CORRUPTED; } diff --git a/src/dags/node_library_utils.hpp b/src/dags/node_library_utils.hpp index 5c56c11487..b5accdeafe 100644 --- a/src/dags/node_library_utils.hpp +++ b/src/dags/node_library_utils.hpp @@ -35,6 +35,6 @@ CustomNodeTensorPrecision toCustomNodeTensorPrecision(ov::element::Type_t precis Precision toInferenceEnginePrecision(CustomNodeTensorPrecision precision); std::unique_ptr createCustomNodeParamArray(const std::unordered_map& paramMap); std::unique_ptr createCustomNodeTensorArray(const TensorMap& tensorMap, const std::unordered_map& tensorDims); -Status createTensorInfoMap(struct CustomNodeTensorInfo* info, int infoCount, std::map>& out, release_fn freeCallback, void* customNodeLibraryInternalManager); +Status createTensorInfoMap(struct CustomNodeTensorInfo* info, int infoCount, std::map>& out, release_fn freeCallback, void* customNodeLibraryInternalManager); } // namespace ovms diff --git a/src/dags/pipeline_factory.cpp b/src/dags/pipeline_factory.cpp index 6ebe55c0e1..2eaa4696eb 100644 --- a/src/dags/pipeline_factory.cpp +++ b/src/dags/pipeline_factory.cpp @@ -110,6 +110,7 @@ Status PipelineFactory::revalidatePipelines(ModelManager& manager) { } return firstErrorStatus; } + const std::vector PipelineFactory::getPipelinesNames() const { std::vector names; std::shared_lock lock(definitionsMtx); @@ -122,11 +123,11 @@ const std::vector PipelineFactory::getPipelinesNames() const { template Status PipelineFactory::create(std::unique_ptr& pipeline, const std::string& name, const RequestType* request, ResponseType* response, ModelManager& manager) const { + std::shared_lock lock(definitionsMtx); if (!definitionExists(name)) { SPDLOG_LOGGER_DEBUG(dag_executor_logger, "Pipeline with requested name: {} does not exist", name); return StatusCode::PIPELINE_DEFINITION_NAME_MISSING; } - std::shared_lock lock(definitionsMtx); auto& definition = *definitions.at(name); lock.unlock(); return definition.create(pipeline, request, response, manager); diff --git a/src/dags/pipelinedefinition.cpp b/src/dags/pipelinedefinition.cpp index 3760f94108..76452f36f2 100644 --- a/src/dags/pipelinedefinition.cpp +++ b/src/dags/pipelinedefinition.cpp @@ -39,6 +39,7 @@ #include "pipelinedefinitionunloadguard.hpp" namespace ovms { +const std::string PipelineDefinition::SCHEDULER_CLASS_NAME{"Pipeline"}; Status toNodeKind(const std::string& str, NodeKind& nodeKind) { if (str == DL_NODE_CONFIG_TYPE) { @@ -62,7 +63,7 @@ PipelineDefinition::PipelineDefinition(const std::string& pipelineName, nodeInfos(nodeInfos), connections(connections), reporter(std::make_unique(metricConfig, registry, pipelineName, VERSION)), - status(this->pipelineName) {} + status(SCHEDULER_CLASS_NAME, this->pipelineName) {} Status PipelineDefinition::validate(ModelManager& manager) { SPDLOG_LOGGER_DEBUG(modelmanager_logger, "Started validation of pipeline: {}", getName()); @@ -275,7 +276,7 @@ Status PipelineDefinition::create(std::unique_ptr& pipeline, nodeResources.at(info.nodeName))); break; case NodeKind::EXIT: { - auto node = std::make_unique>(response, getOutputsInfo(), info.gatherFromNode, useSharedOutputContent(request), getName()); + auto node = std::make_unique>(response, getOutputsInfo(), info.gatherFromNode, useSharedOutputContentFn(request), getName()); exit = node.get(); nodes.emplace(info.nodeName, std::move(node)); break; @@ -1063,24 +1064,22 @@ const tensor_map_t PipelineDefinition::getOutputsInfo() const { return copy; } -static std::shared_ptr applyDemultiplexerShapeForTensor(const std::shared_ptr& tensorInfo, int32_t demultiplyCount) { +static std::shared_ptr applyDemultiplexerShapeForTensor(const std::shared_ptr& tensorInfo, int32_t demultiplyCount) { return tensorInfo->createCopyWithDemultiplexerDimensionPrefix(demultiplyCount ? Dimension(demultiplyCount) : Dimension::any()); } -static std::shared_ptr createOutputTensorInfoForPipeline(const std::string& mappedName, const std::shared_ptr& tensorInfo, const Shape& gatherShape, bool isConnectionFromDemultiplexer) { - std::shared_ptr newOwnedTensorInfo; +static std::shared_ptr createOutputTensorInfoForPipeline(const std::string& mappedName, const std::shared_ptr& tensorInfo, const Shape& gatherShape, bool isConnectionFromDemultiplexer) { if (gatherShape.size() == 0) { - newOwnedTensorInfo = std::make_shared(*tensorInfo); - newOwnedTensorInfo->setMappedName(mappedName); - return newOwnedTensorInfo; + return tensorInfo->createCopyWithNewMappedName(mappedName); } Shape newShape = tensorInfo->getShape(); if (isConnectionFromDemultiplexer) { newShape.erase(newShape.begin()); } newShape.insert(newShape.begin(), gatherShape.begin(), gatherShape.end()); + std::shared_ptr newOwnedTensorInfo; newOwnedTensorInfo = tensorInfo->createCopyWithNewShape(newShape); - newOwnedTensorInfo->setMappedName(mappedName); + newOwnedTensorInfo = newOwnedTensorInfo->createCopyWithNewMappedName(mappedName); return newOwnedTensorInfo; } diff --git a/src/dags/pipelinedefinition.hpp b/src/dags/pipelinedefinition.hpp index 54e00aea5c..e69d9d0cfe 100644 --- a/src/dags/pipelinedefinition.hpp +++ b/src/dags/pipelinedefinition.hpp @@ -82,8 +82,6 @@ class PipelineDefinition { private: mutable std::shared_mutex metadataMtx; std::atomic requestsHandlesCounter = 0; - std::shared_mutex loadMtx; - std::condition_variable loadedNotify; // Pipelines are not versioned and any available definition has constant version equal 1. @@ -188,6 +186,7 @@ class PipelineDefinition { } public: + static const std::string SCHEDULER_CLASS_NAME; Status waitForLoaded(std::unique_ptr& unloadGuard, const uint waitForLoadedTimeoutMicroseconds = WAIT_FOR_LOADED_DEFAULT_TIMEOUT_MICROSECONDS); }; } // namespace ovms diff --git a/src/dags/pipelinedefinitionstatus.cpp b/src/dags/pipelinedefinitionstatus.cpp index 48c68e16d2..5fb27479b0 100644 --- a/src/dags/pipelinedefinitionstatus.cpp +++ b/src/dags/pipelinedefinitionstatus.cpp @@ -188,8 +188,8 @@ StateKeeper RetiredState::handle(const RetireEvent& e) const { return {}; } -PipelineDefinitionStatus::PipelineDefinitionStatus(const std::string& name) : - MachineState(name) {} +PipelineDefinitionStatus::PipelineDefinitionStatus(const std::string& type, const std::string& name) : + MachineState(type, name) {} bool PipelineDefinitionStatus::isAvailable() const { auto state = getStateCode(); return (state == PipelineDefinitionStateCode::AVAILABLE) || diff --git a/src/dags/pipelinedefinitionstatus.hpp b/src/dags/pipelinedefinitionstatus.hpp index d4f33fb49f..59039f08a0 100644 --- a/src/dags/pipelinedefinitionstatus.hpp +++ b/src/dags/pipelinedefinitionstatus.hpp @@ -42,20 +42,22 @@ const std::string& pipelineDefinitionStateCodeToString(PipelineDefinitionStateCo template class MachineState { public: - MachineState(const std::string& name) : + MachineState(const std::string& type, const std::string& name) : + type(type), name(name) {} template void handle(const Event& event) { - SPDLOG_LOGGER_DEBUG(modelmanager_logger, "Pipeline: {} state: {} handling: {}: {}", - name, pipelineDefinitionStateCodeToString(getStateCode()), event.name, event.getDetails()); + SPDLOG_LOGGER_DEBUG(modelmanager_logger, "{}: {} state: {} handling: {}: {}", + type, name, pipelineDefinitionStateCodeToString(getStateCode()), event.name, event.getDetails()); try { std::visit([this, &event](auto state) { state->handle(event).execute(*this); }, currentState); } catch (std::logic_error& le) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "Pipeline: {} state: {} handling: {} error: {}", name, pipelineDefinitionStateCodeToString(getStateCode()), event.name, le.what()); + SPDLOG_LOGGER_ERROR(modelmanager_logger, "{}: {} state: {} handling: {} error: {}", + type, name, pipelineDefinitionStateCodeToString(getStateCode()), event.name, le.what()); throw; } - SPDLOG_LOGGER_INFO(modelmanager_logger, "Pipeline: {} state changed to: {} after handling: {}: {}", - name, pipelineDefinitionStateCodeToString(getStateCode()), event.name, event.getDetails()); + SPDLOG_LOGGER_INFO(modelmanager_logger, "{}: {} state changed to: {} after handling: {}: {}", + type, name, pipelineDefinitionStateCodeToString(getStateCode()), event.name, event.getDetails()); } template @@ -73,6 +75,7 @@ class MachineState { } private: + const std::string type; const std::string& name; std::tuple allPossibleStates; std::variant currentState{&std::get<0>(allPossibleStates)}; @@ -216,7 +219,7 @@ struct RetiredState { class PipelineDefinitionStatus : public MachineState { public: - PipelineDefinitionStatus(const std::string& name); + PipelineDefinitionStatus(const std::string& type, const std::string& name); bool isAvailable() const; bool canEndLoaded() const; bool isRevalidationRequired() const; diff --git a/src/deserialization.cpp b/src/deserialization.cpp index 7b0092dee2..36cac0b9b8 100644 --- a/src/deserialization.cpp +++ b/src/deserialization.cpp @@ -41,7 +41,7 @@ Status InputSink::give(const std::string& name, ov::Tensor& t return status; } ov::Tensor makeTensor(const InferenceTensor& requestInput, - const std::shared_ptr& tensorInfo) { + const std::shared_ptr& tensorInfo) { OVMS_PROFILE_FUNCTION(); ov::Shape shape; for (const auto& dim : requestInput.getShape()) { @@ -52,7 +52,7 @@ ov::Tensor makeTensor(const InferenceTensor& requestInput, } ov::Tensor makeTensor(const tensorflow::TensorProto& requestInput, - const std::shared_ptr& tensorInfo) { + const std::shared_ptr& tensorInfo) { OVMS_PROFILE_FUNCTION(); ov::Shape shape; for (int i = 0; i < requestInput.tensor_shape().dim_size(); i++) { @@ -63,7 +63,7 @@ ov::Tensor makeTensor(const tensorflow::TensorProto& requestInput, } ov::Tensor makeTensor(const ::KFSRequest::InferInputTensor& requestInput, - const std::shared_ptr& tensorInfo, + const std::shared_ptr& tensorInfo, const std::string& buffer) { OVMS_PROFILE_FUNCTION(); ov::Shape shape; @@ -75,12 +75,13 @@ ov::Tensor makeTensor(const ::KFSRequest::InferInputTensor& requestInput, return tensor; } ov::Tensor makeTensor(const ::KFSRequest::InferInputTensor& requestInput, - const std::shared_ptr& tensorInfo) { + const std::shared_ptr& tensorInfo) { OVMS_PROFILE_FUNCTION(); ov::Shape shape; for (int i = 0; i < requestInput.shape_size(); i++) { shape.push_back(requestInput.shape().at(i)); } + ov::element::Type precision = tensorInfo->getOvPrecision(); ov::Tensor tensor(precision, shape); return tensor; diff --git a/src/deserialization.hpp b/src/deserialization.hpp index 15cb5006e3..e565b2366b 100644 --- a/src/deserialization.hpp +++ b/src/deserialization.hpp @@ -27,34 +27,41 @@ #include "tensorflow_serving/apis/prediction_service.grpc.pb.h" #pragma GCC diagnostic pop -#include "binaryutils.hpp" #include "inferencerequest.hpp" #include "inferencetensor.hpp" #include "kfs_frontend/kfs_utils.hpp" #include "profiler.hpp" #include "status.hpp" +#include "tensor_conversion.hpp" #include "tensorinfo.hpp" #include "tfs_frontend/tfs_utils.hpp" namespace ovms { +#define RETURN_IF_ERR(X) \ + { \ + auto status = (X); \ + if (!status.ok()) \ + return status; \ + } + ov::Tensor makeTensor(const tensorflow::TensorProto& requestInput, - const std::shared_ptr& tensorInfo); + const std::shared_ptr& tensorInfo); ov::Tensor makeTensor(const ::KFSRequest::InferInputTensor& requestInput, - const std::shared_ptr& tensorInfo, + const std::shared_ptr& tensorInfo, const std::string& buffer); ov::Tensor makeTensor(const ::KFSRequest::InferInputTensor& requestInput, - const std::shared_ptr& tensorInfo); + const std::shared_ptr& tensorInfo); ov::Tensor makeTensor(const InferenceTensor& requestInput, - const std::shared_ptr& tensorInfo); + const std::shared_ptr& tensorInfo); class ConcreteTensorProtoDeserializator { public: static ov::Tensor deserializeTensorProto( const ::KFSRequest::InferInputTensor& requestInput, - const std::shared_ptr& tensorInfo, + const std::shared_ptr& tensorInfo, const std::string* buffer) { OVMS_PROFILE_FUNCTION(); if (nullptr != buffer) { @@ -217,7 +224,7 @@ class ConcreteTensorProtoDeserializator { static ov::Tensor deserializeTensorProto( const InferenceTensor& requestInput, - const std::shared_ptr& tensorInfo) { + const std::shared_ptr& tensorInfo) { OVMS_PROFILE_FUNCTION(); switch (tensorInfo->getPrecision()) { case ovms::Precision::FP64: @@ -247,7 +254,7 @@ class ConcreteTensorProtoDeserializator { } static ov::Tensor deserializeTensorProto( const tensorflow::TensorProto& requestInput, - const std::shared_ptr& tensorInfo) { + const std::shared_ptr& tensorInfo) { OVMS_PROFILE_FUNCTION(); switch (tensorInfo->getPrecision()) { case ovms::Precision::FP32: @@ -300,14 +307,14 @@ class ConcreteTensorProtoDeserializator { template ov::Tensor deserializeTensorProto( const tensorflow::TensorProto& requestInput, - const std::shared_ptr& tensorInfo) { + const std::shared_ptr& tensorInfo) { return TensorProtoDeserializator::deserializeTensorProto(requestInput, tensorInfo); } template ov::Tensor deserializeTensorProto( const ::KFSRequest::InferInputTensor& requestInput, - const std::shared_ptr& tensorInfo, + const std::shared_ptr& tensorInfo, const std::string* buffer) { return TensorProtoDeserializator::deserializeTensorProto(requestInput, tensorInfo, buffer); } @@ -315,7 +322,7 @@ ov::Tensor deserializeTensorProto( template ov::Tensor deserializeTensorProto( const InferenceTensor& requestInput, - const std::shared_ptr& tensorInfo) { + const std::shared_ptr& tensorInfo) { return TensorProtoDeserializator::deserializeTensorProto(requestInput, tensorInfo); } @@ -348,14 +355,27 @@ Status deserializePredictRequest( auto& requestInput = requestInputItr->second; ov::Tensor tensor; - if (isNativeFileFormatUsed(requestInput)) { - SPDLOG_DEBUG("Request contains input in native file format: {}", name); - status = convertNativeFileFormatRequestTensorToOVTensor(requestInput, tensor, tensorInfo, nullptr); - if (!status.ok()) { - SPDLOG_DEBUG("Input native file format conversion failed."); - return status; + if (requiresPreProcessing(requestInput)) { + switch (tensorInfo->getPreProcessingHint()) { + case TensorInfo::ProcessingHint::STRING_1D_U8: + SPDLOG_DEBUG("Request contains input in 1D string format: {}", name); + RETURN_IF_ERR(convertStringRequestToOVTensor1D(requestInput, tensor, nullptr)); + break; + case TensorInfo::ProcessingHint::STRING_2D_U8: + SPDLOG_DEBUG("Request contains input in 2D string format: {}", name); + RETURN_IF_ERR(convertStringRequestToOVTensor2D(requestInput, tensor, nullptr)); + break; + case TensorInfo::ProcessingHint::IMAGE: + SPDLOG_DEBUG("Request contains input in native file format: {}", name); + RETURN_IF_ERR(convertNativeFileFormatRequestTensorToOVTensor(requestInput, tensor, tensorInfo, nullptr)); + break; + default: + SPDLOG_DEBUG("Request input: {} requires conversion but endpoint specifies no processing hint. Number of dimensions: {}; precision: {}; demultiplexer: {}", + name, tensorInfo->getShape().size(), toString(tensorInfo->getPrecision()), tensorInfo->isInfluencedByDemultiplexer()); + return StatusCode::NOT_IMPLEMENTED; } } else { + // Data Array Format tensor = deserializeTensorProto( requestInput, tensorInfo); } @@ -409,12 +429,24 @@ Status deserializePredictRequest( auto inputIndex = requestInputItr - request.inputs().begin(); auto bufferLocation = deserializeFromSharedInputContents ? &request.raw_input_contents()[inputIndex] : nullptr; - if (isNativeFileFormatUsed(*requestInputItr)) { - SPDLOG_DEBUG("Request contains input in native file format: {}", name); - status = convertNativeFileFormatRequestTensorToOVTensor(*requestInputItr, tensor, tensorInfo, bufferLocation); - if (!status.ok()) { - SPDLOG_DEBUG("Input native file format conversion failed."); - return status; + if (requiresPreProcessing(*requestInputItr)) { + switch (tensorInfo->getPreProcessingHint()) { + case TensorInfo::ProcessingHint::STRING_1D_U8: + SPDLOG_DEBUG("Request contains input in 1D string format: {}", name); + RETURN_IF_ERR(convertStringRequestToOVTensor1D(*requestInputItr, tensor, bufferLocation)); + break; + case TensorInfo::ProcessingHint::STRING_2D_U8: + SPDLOG_DEBUG("Request contains input in 2D string format: {}", name); + RETURN_IF_ERR(convertStringRequestToOVTensor2D(*requestInputItr, tensor, bufferLocation)); + break; + case TensorInfo::ProcessingHint::IMAGE: + SPDLOG_DEBUG("Request contains input in native file format: {}", name); + RETURN_IF_ERR(convertNativeFileFormatRequestTensorToOVTensor(*requestInputItr, tensor, tensorInfo, bufferLocation)); + break; + default: + SPDLOG_DEBUG("Request input: {} requires conversion but endpoint specifies no processing hint. Number of dimensions: {}; precision: {}; demultiplexer: {}", + name, tensorInfo->getShape().size(), toString(tensorInfo->getPrecision()), tensorInfo->isInfluencedByDemultiplexer()); + return StatusCode::NOT_IMPLEMENTED; } } else { tensor = deserializeTensorProto(*requestInputItr, tensorInfo, bufferLocation); diff --git a/src/example/SampleCpuExtension/Dockerfile.redhat b/src/example/SampleCpuExtension/Dockerfile.redhat index 94dd8d4822..a55a24929b 100644 --- a/src/example/SampleCpuExtension/Dockerfile.redhat +++ b/src/example/SampleCpuExtension/Dockerfile.redhat @@ -31,7 +31,7 @@ RUN mkdir -p $TEMP_DIR && cd $TEMP_DIR/ && \ mkdir /opt/intel && \ tar -zxf l_openvino_toolkit*.tgz -C /opt/intel && \ ln -s /opt/intel/l_openvino_* /opt/intel/openvino && \ - ln -s /opt/intel/l_openvino_* /opt/intel/openvino_2022 + ln -s /opt/intel/l_openvino_* /opt/intel/openvino_2023 WORKDIR /workspace COPY Makefile ov_extension.cpp CustomReluOp.cpp CustomReluOp.hpp ./ diff --git a/src/example/SampleCpuExtension/Dockerfile.ubuntu b/src/example/SampleCpuExtension/Dockerfile.ubuntu index 3c91b933ac..891baa1a79 100644 --- a/src/example/SampleCpuExtension/Dockerfile.ubuntu +++ b/src/example/SampleCpuExtension/Dockerfile.ubuntu @@ -15,7 +15,7 @@ # ARG BASE_IMAGE=ubuntu:20.04 -FROM $BASE_IMAGE as base_build +FROM $BASE_IMAGE ARG TEMP_DIR=/tmp/openvino_installer ARG DLDT_PACKAGE_URL @@ -32,7 +32,7 @@ RUN mkdir -p $TEMP_DIR && cd $TEMP_DIR/ && \ mkdir /opt/intel && \ tar -zxf l_openvino_toolkit*.tgz -C /opt/intel && \ ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino && \ - ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2022 + ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2023 WORKDIR /workspace COPY Makefile ov_extension.cpp CustomReluOp.cpp CustomReluOp.hpp ./ diff --git a/src/example/SampleCpuExtension/Makefile b/src/example/SampleCpuExtension/Makefile index 2fc9c7afb5..0bb5d4d44e 100644 --- a/src/example/SampleCpuExtension/Makefile +++ b/src/example/SampleCpuExtension/Makefile @@ -14,7 +14,7 @@ # limitations under the License. # -OPENVINO_PATH ?= /opt/intel/openvino_2022 +OPENVINO_PATH ?= /opt/intel/openvino_2023 all: $(eval SHELL:=/bin/bash) /usr/bin/g++ --version diff --git a/src/example/SampleCpuExtension/README.md b/src/example/SampleCpuExtension/README.md index 941048e477..25d67f043f 100644 --- a/src/example/SampleCpuExtension/README.md +++ b/src/example/SampleCpuExtension/README.md @@ -40,4 +40,4 @@ $ docker run -it --rm -p 9000:9000 -v `pwd`/lib/${BASE_OS}:/extension:ro -v `pwd --port 9000 --model_name resnet --model_path /resnet --cpu_extension /extension/libcustom_relu_cpu_extension.so ``` -> **NOTE**: Learn more about [OpenVINO extensibility](https://docs.openvino.ai/2022.2/openvino_docs_Extensibility_UG_Intro.html) +> **NOTE**: Learn more about [OpenVINO extensibility](https://docs.openvino.ai/2023.0/openvino_docs_Extensibility_UG_Intro.html) diff --git a/src/example/SampleCustomLoader/Dockerfile.ubuntu b/src/example/SampleCustomLoader/Dockerfile.ubuntu index 7a5ed4c96d..b818e6e980 100644 --- a/src/example/SampleCustomLoader/Dockerfile.ubuntu +++ b/src/example/SampleCustomLoader/Dockerfile.ubuntu @@ -14,7 +14,8 @@ # limitations under the License. # -FROM ubuntu:20.04 +ARG BASE_IMAGE=ubuntu:20.04 +FROM $BASE_IMAGE RUN apt update && apt install -y build-essential wget unzip make COPY Makefile /src/example/SampleCustomLoader/ COPY sampleCustLoader.cpp /src/example/SampleCustomLoader/ diff --git a/src/example/SampleCustomLoader/Makefile b/src/example/SampleCustomLoader/Makefile index b3ff290115..d57b40973b 100644 --- a/src/example/SampleCustomLoader/Makefile +++ b/src/example/SampleCustomLoader/Makefile @@ -16,6 +16,7 @@ HEADER_FILE_PATH ?= ../../customloaderinterface.hpp OUT_LIB = libsampleloader.so BASE_OS ?= ubuntu +BASE_IMAGE ?= ubuntu:20.04 LIB_INC = -Iinclude -I../../ -Irapidjson-1.1.0/include CFLAGS = -fPIC -Wl,-rpath,. -fpic OPS = -O2 -U_FORTIFY_SOURCE -fstack-protector -fno-omit-frame-pointer -D_FORTIFY_SOURCE=1 -fno-strict-overflow @@ -37,7 +38,7 @@ clean: docker_build: cp $(HEADER_FILE_PATH) ./customloaderinterface.hpp - docker build -f Dockerfile.$(BASE_OS) -t custom_loader_build_image:latest --build-arg http_proxy=${http_proxy} --build-arg https_proxy=${https_proxy} . + docker build -f Dockerfile.$(BASE_OS) -t custom_loader_build_image:latest --build-arg http_proxy=${http_proxy} --build-arg https_proxy=${https_proxy} --build-arg BASE_IMAGE=${BASE_IMAGE} . rm -Rf lib && mkdir ./lib docker cp $$(docker create --rm custom_loader_build_image:latest):/src/example/SampleCustomLoader/libsampleloader.so ./lib/ rm customloaderinterface.hpp diff --git a/src/binaryutils.hpp b/src/filesystem.cpp similarity index 69% rename from src/binaryutils.hpp rename to src/filesystem.cpp index 9b378d2fc5..5de2ec908a 100644 --- a/src/binaryutils.hpp +++ b/src/filesystem.cpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright 2021 Intel Corporation +// Copyright 2020-2023 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,15 +13,17 @@ // See the License for the specific language governing permissions and // limitations under the License. //***************************************************************************** -#pragma once -#include -#include - -#include "tensorinfo.hpp" +#include "filesystem.hpp" namespace ovms { -class Status; -template -Status convertNativeFileFormatRequestTensorToOVTensor(const TensorType& src, ov::Tensor& tensor, const std::shared_ptr& tensorInfo, const std::string* buffer); + +const std::string FileSystem::S3_URL_PREFIX = "s3://"; + +const std::string FileSystem::GCS_URL_PREFIX = "gs://"; + +const std::string FileSystem::AZURE_URL_FILE_PREFIX = "azfs://"; + +const std::string FileSystem::AZURE_URL_BLOB_PREFIX = "az://"; + } // namespace ovms diff --git a/src/filesystem.hpp b/src/filesystem.hpp index 8f14be21c4..d445e3ed78 100644 --- a/src/filesystem.hpp +++ b/src/filesystem.hpp @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include #include @@ -30,85 +31,86 @@ namespace ovms { namespace fs = std::filesystem; using files_list_t = std::set; + class FileSystem { public: /** * @brief Destroy the File System object - * + * */ virtual ~FileSystem() {} /** * @brief Check if given path or file exists - * - * @param path - * @param exists - * @return StatusCode + * + * @param path + * @param exists + * @return StatusCode */ virtual StatusCode fileExists(const std::string& path, bool* exists) = 0; /** * @brief Check if given path is a directory - * - * @param path - * @param is_dir - * @return StatusCode + * + * @param path + * @param is_dir + * @return StatusCode */ virtual StatusCode isDirectory(const std::string& path, bool* is_dir) = 0; /** * @brief Get the files and directories in given directory - * - * @param path - * @param contents - * @return StatusCode + * + * @param path + * @param contents + * @return StatusCode */ virtual StatusCode getDirectoryContents(const std::string& path, files_list_t* contents) = 0; /** * @brief Get only directories in given directory - * - * @param path - * @param subdirs - * @return StatusCode + * + * @param path + * @param subdirs + * @return StatusCode */ virtual StatusCode getDirectorySubdirs(const std::string& path, files_list_t* subdirs) = 0; /** * @brief Get only files in given directory - * - * @param path - * @param files - * @return StatusCode + * + * @param path + * @param files + * @return StatusCode */ virtual StatusCode getDirectoryFiles(const std::string& path, files_list_t* files) = 0; /** * @brief Read the content of the given file into a string - * - * @param path - * @param contents - * @return StatusCode + * + * @param path + * @param contents + * @return StatusCode */ virtual StatusCode readTextFile(const std::string& path, std::string* contents) = 0; /** * @brief Download a remote directory - * - * @param path - * @param local_path - * @return StatusCode + * + * @param path + * @param local_path + * @return StatusCode */ virtual StatusCode downloadFileFolder(const std::string& path, const std::string& local_path) = 0; /** - * @brief Download model versions - * - * @param path - * @param local_path - * @param versions - * @return StatusCode - */ + * @brief Download model versions + * + * @param path + * @param local_path + * @param versions + * @return StatusCode + */ virtual StatusCode downloadModelVersions(const std::string& path, std::string* local_path, const std::vector& versions) = 0; /** @@ -121,9 +123,9 @@ class FileSystem { virtual StatusCode deleteFileFolder(const std::string& path) = 0; /** * @brief Create a Temp Path - * - * @param local_path - * @return StatusCode + * + * @param local_path + * @return StatusCode */ static StatusCode createTempPath(std::string* local_path) { std::string file_template = "/tmp/fileXXXXXX"; @@ -142,10 +144,63 @@ class FileSystem { } static bool isPathEscaped(const std::string& path) { - return std::string::npos != path.find("../") || std::string::npos != path.find("/.."); + std::size_t lhs = path.find("../"); + std::size_t rhs = path.find("/.."); + return (std::string::npos != lhs && lhs == 0) || (std::string::npos != rhs && rhs == path.length() - 3) || std::string::npos != path.find("/../"); + } + + static const std::string S3_URL_PREFIX; + + static const std::string GCS_URL_PREFIX; + + static const std::string AZURE_URL_FILE_PREFIX; + + static const std::string AZURE_URL_BLOB_PREFIX; + + static bool isLocalFilesystem(const std::string& basePath) { + if (basePath.rfind(S3_URL_PREFIX, 0) == 0) { + return false; + } + if (basePath.rfind(GCS_URL_PREFIX, 0) == 0) { + return false; + } + if (basePath.rfind(AZURE_URL_FILE_PREFIX, 0) == 0) { + return false; + } + if (basePath.rfind(AZURE_URL_BLOB_PREFIX, 0) == 0) { + return false; + } + return true; + } + + static void setPath(std::string& path, const std::string& givenPath, const std::string& rootDirectoryPath) { + if (givenPath.size() == 0) { + path = rootDirectoryPath; + } else if (!FileSystem::isLocalFilesystem(givenPath)) { + // Cloud filesystem + path = givenPath; + } else if (givenPath.size() > 0 && givenPath.at(0) == '/') { + // Full path case + path = givenPath; + } else { + // Relative path case + if (rootDirectoryPath.empty()) + throw std::logic_error("Using relative path without setting graph directory path."); + path = rootDirectoryPath + givenPath; + } + } + + static void setRootDirectoryPath(std::string& rootDirectoryPath, const std::string& givenPath) { + std::string currentWorkingDir = std::filesystem::current_path(); + if (givenPath.size() > 1 && givenPath.find_last_of("/\\") != std::string::npos) { + auto configDirectory = givenPath.substr(0, givenPath.find_last_of("/\\") + 1); + configDirectory.empty() ? rootDirectoryPath = currentWorkingDir + "/" : rootDirectoryPath = configDirectory; + } else { + rootDirectoryPath = currentWorkingDir + "/"; + } } - std::string appendSlash(const std::string& name) { + static std::string appendSlash(const std::string& name) { if (name.empty() || (name.back() == '/')) { return name; } @@ -153,11 +208,11 @@ class FileSystem { return (name + "/"); } - bool isAbsolutePath(const std::string& path) { + static bool isAbsolutePath(const std::string& path) { return !path.empty() && (path[0] == '/'); } - std::string joinPath(std::initializer_list segments) { + static std::string joinPath(std::initializer_list segments) { std::string joined; for (const auto& seg : segments) { diff --git a/src/gcsfilesystem.cpp b/src/gcsfilesystem.cpp index 9cca4aa4ed..5091ce3fb0 100644 --- a/src/gcsfilesystem.cpp +++ b/src/gcsfilesystem.cpp @@ -39,11 +39,9 @@ namespace ovms { namespace fs = std::filesystem; namespace gcs = google::cloud::storage; -const std::string GCSFileSystem::GCS_URL_PREFIX = "gs://"; - StatusCode GCSFileSystem::parsePath(const std::string& path, std::string* bucket, std::string* object) { - int bucket_start = path.find(GCS_URL_PREFIX) + GCS_URL_PREFIX.size(); + int bucket_start = path.find(FileSystem::GCS_URL_PREFIX) + FileSystem::GCS_URL_PREFIX.size(); int bucket_end = path.find("/", bucket_start); if (bucket_end > bucket_start) { *bucket = path.substr(bucket_start, bucket_end - bucket_start); @@ -208,7 +206,7 @@ StatusCode GCSFileSystem::getDirectorySubdirs(const std::string& path, } for (auto item = subdirs->begin(); item != subdirs->end();) { bool is_directory; - auto status = this->isDirectory(joinPath({path, *item}), &is_directory); + auto status = this->isDirectory(FileSystem::joinPath({path, *item}), &is_directory); if (status != StatusCode::OK) { SPDLOG_LOGGER_ERROR(gcs_logger, "Unable to list directory subdir content {} -> {}", path, ovms::Status(status).string()); @@ -235,7 +233,7 @@ StatusCode GCSFileSystem::getDirectoryFiles(const std::string& path, } for (auto item = files->begin(); item != files->end();) { bool is_directory; - auto status = this->isDirectory(joinPath({path, *item}), &is_directory); + auto status = this->isDirectory(FileSystem::joinPath({path, *item}), &is_directory); if (status != StatusCode::OK) { SPDLOG_LOGGER_ERROR(gcs_logger, "Unable to list directory content {} -> {}", path, ovms::Status(status).string()); @@ -357,8 +355,8 @@ StatusCode GCSFileSystem::downloadFileFolder(const std::string& path, const std: } for (auto&& d : dirs) { - std::string remote_dir_path = joinPath({path, d}); - std::string local_dir_path = joinPath({local_path, d}); + std::string remote_dir_path = FileSystem::joinPath({path, d}); + std::string local_dir_path = FileSystem::joinPath({local_path, d}); SPDLOG_LOGGER_TRACE(gcs_logger, "Processing directory {} from {} -> {}", d, remote_dir_path, local_dir_path); auto mkdir_status = CreateLocalDir(local_dir_path); @@ -378,8 +376,8 @@ StatusCode GCSFileSystem::downloadFileFolder(const std::string& path, const std: if (std::any_of(acceptedFiles.begin(), acceptedFiles.end(), [&f](const std::string& x) { return f.size() > 0 && endsWith(f, x); })) { - std::string remote_file_path = joinPath({path, f}); - std::string local_file_path = joinPath({local_path, f}); + std::string remote_file_path = FileSystem::joinPath({path, f}); + std::string local_file_path = FileSystem::joinPath({local_path, f}); SPDLOG_LOGGER_TRACE(gcs_logger, "Processing file {} from {} -> {}", f, remote_file_path, local_file_path); auto download_status = diff --git a/src/gcsfilesystem.hpp b/src/gcsfilesystem.hpp index 3af4c4a7fe..6d7ff66141 100644 --- a/src/gcsfilesystem.hpp +++ b/src/gcsfilesystem.hpp @@ -131,8 +131,6 @@ class GCSFileSystem : public FileSystem { */ StatusCode deleteFileFolder(const std::string& path) override; - static const std::string GCS_URL_PREFIX; - private: /** * @brief diff --git a/src/grpc_utils.cpp b/src/grpc_utils.cpp index d6ea5942ec..e6e79797d1 100644 --- a/src/grpc_utils.cpp +++ b/src/grpc_utils.cpp @@ -40,6 +40,7 @@ const grpc::Status grpc(const Status& status) { {StatusCode::MODEL_MISSING, grpc::StatusCode::NOT_FOUND}, {StatusCode::MODEL_NAME_MISSING, grpc::StatusCode::NOT_FOUND}, {StatusCode::PIPELINE_DEFINITION_NAME_MISSING, grpc::StatusCode::NOT_FOUND}, + {StatusCode::MEDIAPIPE_DEFINITION_NAME_MISSING, grpc::StatusCode::NOT_FOUND}, {StatusCode::MODEL_VERSION_MISSING, grpc::StatusCode::NOT_FOUND}, {StatusCode::MODEL_VERSION_NOT_LOADED_ANYMORE, grpc::StatusCode::NOT_FOUND}, {StatusCode::MODEL_VERSION_NOT_LOADED_YET, grpc::StatusCode::NOT_FOUND}, @@ -70,6 +71,8 @@ const grpc::Status grpc(const Status& status) { {StatusCode::INVALID_SHAPE, grpc::StatusCode::INVALID_ARGUMENT}, {StatusCode::INVALID_BUFFER_TYPE, grpc::StatusCode::INVALID_ARGUMENT}, {StatusCode::INVALID_DEVICE_ID, grpc::StatusCode::INVALID_ARGUMENT}, + {StatusCode::INVALID_STRING_INPUT, grpc::StatusCode::INVALID_ARGUMENT}, + {StatusCode::INVALID_INPUT_FORMAT, grpc::StatusCode::INVALID_ARGUMENT}, {StatusCode::INVALID_PRECISION, grpc::StatusCode::INVALID_ARGUMENT}, {StatusCode::INVALID_VALUE_COUNT, grpc::StatusCode::INVALID_ARGUMENT}, {StatusCode::INVALID_CONTENT_SIZE, grpc::StatusCode::INVALID_ARGUMENT}, diff --git a/src/http_rest_api_handler.cpp b/src/http_rest_api_handler.cpp index b6c59c0da2..ea714ad68e 100644 --- a/src/http_rest_api_handler.cpp +++ b/src/http_rest_api_handler.cpp @@ -123,6 +123,8 @@ Status HttpRestApiHandler::parseModelVersion(std::string& model_version_str, std if (!model_version_str.empty()) { try { model_version = std::stoll(model_version_str.c_str()); + } catch (std::out_of_range const& ex) { + return StatusCode::MODEL_VERSION_MISSING; } catch (std::exception& e) { SPDLOG_DEBUG("Couldn't parse model version {}", model_version_str); return StatusCode::REST_COULD_NOT_PARSE_VERSION; @@ -240,149 +242,6 @@ void HttpRestApiHandler::parseParams(Value& scope, Document& doc) { } } -static Status convertStringToVectorOfSizes(const std::string& comma_separated_numbers, std::vector& sizes) { - std::stringstream streamData(comma_separated_numbers); - std::vector sizes_; - - std::string numberString; - while (std::getline(streamData, numberString, ',')) { - std::optional binarySize = stoi32(numberString); - if (!binarySize.has_value()) { - SPDLOG_DEBUG("Invalid argument in binary size string: {}", numberString); - return StatusCode::REST_BINARY_DATA_SIZE_PARAMETER_INVALID; - } - sizes_.push_back(binarySize.value()); - } - sizes = std::move(sizes_); - - return StatusCode::OK; -} - -static Status parseBinaryInput(KFSTensorInputProto& input, size_t binary_input_size, const char* buffer) { - if (input.datatype() == "FP32") { - for (size_t i = 0; i < binary_input_size; i += sizeof(float)) { - auto value = input.mutable_contents()->mutable_fp32_contents()->Add(); - *value = (*(reinterpret_cast(buffer + i))); - } - } else if (input.datatype() == "INT64") { - for (size_t i = 0; i < binary_input_size; i += sizeof(int64_t)) { - auto value = input.mutable_contents()->mutable_int64_contents()->Add(); - *value = (*(reinterpret_cast(buffer + i))); - } - } else if (input.datatype() == "INT32") { - for (size_t i = 0; i < binary_input_size; i += sizeof(int32_t)) { - auto value = input.mutable_contents()->mutable_int_contents()->Add(); - *value = (*(reinterpret_cast(buffer + i))); - } - } else if (input.datatype() == "INT16") { - for (size_t i = 0; i < binary_input_size; i += sizeof(int16_t)) { - auto value = input.mutable_contents()->mutable_int_contents()->Add(); - *value = (*(reinterpret_cast(buffer + i))); - } - } else if (input.datatype() == "INT8") { - for (size_t i = 0; i < binary_input_size; i += sizeof(int8_t)) { - auto value = input.mutable_contents()->mutable_int_contents()->Add(); - *value = (*(reinterpret_cast(buffer + i))); - } - } else if (input.datatype() == "UINT64") { - for (size_t i = 0; i < binary_input_size; i += sizeof(uint64_t)) { - auto value = input.mutable_contents()->mutable_uint64_contents()->Add(); - *value = (*(reinterpret_cast(buffer + i))); - } - } else if (input.datatype() == "UINT32") { - for (size_t i = 0; i < binary_input_size; i += sizeof(uint32_t)) { - auto value = input.mutable_contents()->mutable_uint_contents()->Add(); - *value = (*(reinterpret_cast(buffer + i))); - } - } else if (input.datatype() == "UINT16") { - for (size_t i = 0; i < binary_input_size; i += sizeof(uint16_t)) { - auto value = input.mutable_contents()->mutable_uint_contents()->Add(); - *value = (*(reinterpret_cast(buffer + i))); - } - } else if (input.datatype() == "UINT8") { - for (size_t i = 0; i < binary_input_size; i += sizeof(uint8_t)) { - auto value = input.mutable_contents()->mutable_uint_contents()->Add(); - *value = (*(reinterpret_cast(buffer + i))); - } - } else if (input.datatype() == "FP64") { - for (size_t i = 0; i < binary_input_size; i += sizeof(double)) { - auto value = input.mutable_contents()->mutable_fp64_contents()->Add(); - *value = (*(reinterpret_cast(buffer + i))); - } - } else if (input.datatype() == "BYTES") { - input.mutable_contents()->add_bytes_contents(buffer, binary_input_size); - } else { - return StatusCode::REST_UNSUPPORTED_PRECISION; - } - - return StatusCode::OK; -} - -#define CONTENT_FIELD_NOT_EMPTY_ERROR_MESSAGE " contents is not empty. Content field should be empty when using binary inputs extension." - -static Status validateContentFieldsEmptiness(KFSTensorInputProto& input) { - if (input.datatype() == "FP32") { - if (input.contents().fp32_contents_size() > 0) { - SPDLOG_DEBUG("FP32" CONTENT_FIELD_NOT_EMPTY_ERROR_MESSAGE); - return StatusCode::REST_CONTENTS_FIELD_NOT_EMPTY; - } - } else if (input.datatype() == "INT64") { - if (input.contents().int64_contents_size() > 0) { - SPDLOG_DEBUG("INT64" CONTENT_FIELD_NOT_EMPTY_ERROR_MESSAGE); - return StatusCode::REST_CONTENTS_FIELD_NOT_EMPTY; - } - } else if (input.datatype() == "INT32") { - if (input.contents().int_contents_size() > 0) { - SPDLOG_DEBUG("INT32" CONTENT_FIELD_NOT_EMPTY_ERROR_MESSAGE); - return StatusCode::REST_CONTENTS_FIELD_NOT_EMPTY; - } - } else if (input.datatype() == "INT16") { - if (input.contents().int_contents_size() > 0) { - SPDLOG_DEBUG("INT16" CONTENT_FIELD_NOT_EMPTY_ERROR_MESSAGE); - return StatusCode::REST_CONTENTS_FIELD_NOT_EMPTY; - } - } else if (input.datatype() == "INT8") { - if (input.contents().int_contents_size() > 0) { - SPDLOG_DEBUG("INT8" CONTENT_FIELD_NOT_EMPTY_ERROR_MESSAGE); - return StatusCode::REST_CONTENTS_FIELD_NOT_EMPTY; - } - } else if (input.datatype() == "UINT64") { - if (input.contents().uint64_contents_size() > 0) { - SPDLOG_DEBUG("UINT64" CONTENT_FIELD_NOT_EMPTY_ERROR_MESSAGE); - return StatusCode::REST_CONTENTS_FIELD_NOT_EMPTY; - } - } else if (input.datatype() == "UINT32") { - if (input.contents().uint_contents_size() > 0) { - SPDLOG_DEBUG("UINT32" CONTENT_FIELD_NOT_EMPTY_ERROR_MESSAGE); - return StatusCode::REST_CONTENTS_FIELD_NOT_EMPTY; - } - } else if (input.datatype() == "UINT16") { - if (input.contents().uint_contents_size() > 0) { - SPDLOG_DEBUG("UINT16" CONTENT_FIELD_NOT_EMPTY_ERROR_MESSAGE); - return StatusCode::REST_CONTENTS_FIELD_NOT_EMPTY; - } - } else if (input.datatype() == "UINT8") { - if (input.contents().uint_contents_size() > 0) { - SPDLOG_DEBUG("UINT8" CONTENT_FIELD_NOT_EMPTY_ERROR_MESSAGE); - return StatusCode::REST_CONTENTS_FIELD_NOT_EMPTY; - } - } else if (input.datatype() == "FP64") { - if (input.contents().fp64_contents_size() > 0) { - SPDLOG_DEBUG("FP64" CONTENT_FIELD_NOT_EMPTY_ERROR_MESSAGE); - return StatusCode::REST_CONTENTS_FIELD_NOT_EMPTY; - } - } else if (input.datatype() == "BYTES") { - if (input.contents().bytes_contents_size() > 0) { - SPDLOG_DEBUG("BYTES" CONTENT_FIELD_NOT_EMPTY_ERROR_MESSAGE); - return StatusCode::REST_CONTENTS_FIELD_NOT_EMPTY; - } - } else { - return StatusCode::REST_UNSUPPORTED_PRECISION; - } - - return StatusCode::OK; -} - static bool isInputEmpty(const ::KFSRequest::InferInputTensor& input) { if (input.datatype() == "FP32") return input.contents().fp32_contents_size() == 0; @@ -409,82 +268,55 @@ static bool isInputEmpty(const ::KFSRequest::InferInputTensor& input) { return true; } -static Status handleBinaryInput(const int binary_input_size, size_t& binary_input_offset, const size_t binary_buffer_size, const char* binary_inputs, ::KFSRequest::InferInputTensor& input) { +static Status handleBinaryInput(const int binary_input_size, size_t& binary_input_offset, const size_t binary_buffer_size, const char* binary_inputs_buffer, ::KFSRequest::InferInputTensor& input, std::string* rawInputContentsBuffer) { if (binary_input_offset + binary_input_size > binary_buffer_size) { SPDLOG_DEBUG("Binary inputs size exceeds provided buffer size {}", binary_buffer_size); return StatusCode::REST_BINARY_BUFFER_EXCEEDED; } - auto status = parseBinaryInput(input, binary_input_size, binary_inputs + binary_input_offset); - if (!status.ok()) { - SPDLOG_DEBUG("Parsing binary inputs failed"); - return status; - } + rawInputContentsBuffer->assign(binary_inputs_buffer + binary_input_offset, binary_input_size); binary_input_offset += binary_input_size; return StatusCode::OK; } -static size_t calculateBinaryDataSize(::KFSRequest::InferInputTensor& input, size_t& binary_data_size) { +static size_t calculateBinaryDataSize(::KFSRequest::InferInputTensor& input) { auto element_size = KFSDataTypeSize(input.datatype()); size_t elements_number = std::accumulate(std::begin(input.shape()), std::end(input.shape()), 1, std::multiplies()); - binary_data_size = elements_number * element_size; + size_t binary_data_size = elements_number * element_size; return binary_data_size; } static Status handleBinaryInputs(::KFSRequest& grpc_request, const std::string& request_body, size_t endOfJson) { - const char* binary_inputs = &(request_body[endOfJson]); + const char* binary_inputs_buffer = &(request_body[endOfJson]); size_t binary_buffer_size = request_body.length() - endOfJson; size_t binary_input_offset = 0; for (int i = 0; i < grpc_request.mutable_inputs()->size(); i++) { auto input = grpc_request.mutable_inputs()->Mutable(i); auto binary_data_size_parameter = input->parameters().find("binary_data_size"); + size_t binary_input_size = 0; if (binary_data_size_parameter != input->parameters().end()) { - auto status = validateContentFieldsEmptiness(*input); - if (!status.ok()) { + if (!isInputEmpty(*input)) { SPDLOG_DEBUG("Request contains both data in json and binary inputs"); - return status; + return StatusCode::REST_CONTENTS_FIELD_NOT_EMPTY; } if (binary_data_size_parameter->second.parameter_choice_case() == inference::InferParameter::ParameterChoiceCase::kInt64Param) { - auto binary_input_size = binary_data_size_parameter->second.int64_param(); - auto status = handleBinaryInput(binary_input_size, binary_input_offset, binary_buffer_size, binary_inputs, *input); - if (!status.ok()) - return status; - } else if (binary_data_size_parameter->second.parameter_choice_case() == inference::InferParameter::ParameterChoiceCase::kStringParam) { - std::vector binary_inputs_sizes; - status = convertStringToVectorOfSizes(binary_data_size_parameter->second.string_param(), binary_inputs_sizes); - if (!status.ok()) { - return status; - } - for (auto size : binary_inputs_sizes) { - if (binary_input_offset + size > binary_buffer_size) { - SPDLOG_DEBUG("Binary inputs size exceeds provided buffer size {}", binary_buffer_size); - return StatusCode::REST_BINARY_BUFFER_EXCEEDED; - } - status = parseBinaryInput(*input, size, binary_inputs + binary_input_offset); - if (!status.ok()) { - SPDLOG_DEBUG("Parsing binary inputs failed"); - return status; - } - binary_input_offset += size; - } + binary_input_size = binary_data_size_parameter->second.int64_param(); } else { - SPDLOG_DEBUG("binary_data_size parameter type should be int64 or string"); + SPDLOG_DEBUG("binary_data_size parameter type should be int64"); return StatusCode::REST_BINARY_DATA_SIZE_PARAMETER_INVALID; } } else { if (!isInputEmpty(*input)) continue; if (grpc_request.mutable_inputs()->size() == 1 && input->datatype() == "BYTES") { - auto status = handleBinaryInput(binary_buffer_size, binary_input_offset, binary_buffer_size, binary_inputs, *input); - if (!status.ok()) - return status; - continue; + binary_input_size = binary_buffer_size; + } else { + binary_input_size = calculateBinaryDataSize(*input); } - size_t binary_data_size = calculateBinaryDataSize(*input, binary_data_size); - auto status = handleBinaryInput(binary_data_size, binary_input_offset, binary_buffer_size, binary_inputs, *input); - if (!status.ok()) - return status; } + auto status = handleBinaryInput(binary_input_size, binary_input_offset, binary_buffer_size, binary_inputs_buffer, *input, grpc_request.add_raw_input_contents()); + if (!status.ok()) + return status; } return StatusCode::OK; } diff --git a/src/http_server.cpp b/src/http_server.cpp index 154291c20f..9c86c76e91 100644 --- a/src/http_server.cpp +++ b/src/http_server.cpp @@ -39,7 +39,7 @@ namespace ovms { namespace net_http = tensorflow::serving::net_http; -const net_http::HTTPStatusCode http(const ovms::Status& status) { +static const net_http::HTTPStatusCode http(const ovms::Status& status) { const std::unordered_map httpStatusMap = { {StatusCode::OK, net_http::HTTPStatusCode::OK}, {StatusCode::OK_RELOADED, net_http::HTTPStatusCode::CREATED}, @@ -68,6 +68,7 @@ const net_http::HTTPStatusCode http(const ovms::Status& status) { {StatusCode::REST_PROTO_TO_STRING_ERROR, net_http::HTTPStatusCode::ERROR}, {StatusCode::REST_UNSUPPORTED_PRECISION, net_http::HTTPStatusCode::BAD_REQUEST}, {StatusCode::REST_SERIALIZE_TENSOR_CONTENT_INVALID_SIZE, net_http::HTTPStatusCode::ERROR}, + {StatusCode::REST_BINARY_BUFFER_EXCEEDED, net_http::HTTPStatusCode::BAD_REQUEST}, {StatusCode::PATH_INVALID, net_http::HTTPStatusCode::ERROR}, {StatusCode::FILE_INVALID, net_http::HTTPStatusCode::ERROR}, @@ -83,6 +84,7 @@ const net_http::HTTPStatusCode http(const ovms::Status& status) { {StatusCode::MODEL_MISSING, net_http::HTTPStatusCode::NOT_FOUND}, {StatusCode::MODEL_NAME_MISSING, net_http::HTTPStatusCode::NOT_FOUND}, {StatusCode::PIPELINE_DEFINITION_NAME_MISSING, net_http::HTTPStatusCode::NOT_FOUND}, + {StatusCode::MEDIAPIPE_DEFINITION_NAME_MISSING, net_http::HTTPStatusCode::NOT_FOUND}, {StatusCode::MODEL_VERSION_MISSING, net_http::HTTPStatusCode::NOT_FOUND}, {StatusCode::MODEL_VERSION_NOT_LOADED_ANYMORE, net_http::HTTPStatusCode::NOT_FOUND}, {StatusCode::MODEL_VERSION_NOT_LOADED_YET, net_http::HTTPStatusCode::NOT_FOUND}, @@ -112,6 +114,8 @@ const net_http::HTTPStatusCode http(const ovms::Status& status) { {StatusCode::INVALID_SHAPE, net_http::HTTPStatusCode::BAD_REQUEST}, {StatusCode::INVALID_BUFFER_TYPE, net_http::HTTPStatusCode::BAD_REQUEST}, {StatusCode::INVALID_DEVICE_ID, net_http::HTTPStatusCode::BAD_REQUEST}, + {StatusCode::INVALID_STRING_INPUT, net_http::HTTPStatusCode::BAD_REQUEST}, + {StatusCode::INVALID_INPUT_FORMAT, net_http::HTTPStatusCode::BAD_REQUEST}, {StatusCode::INVALID_PRECISION, net_http::HTTPStatusCode::BAD_REQUEST}, {StatusCode::INVALID_VALUE_COUNT, net_http::HTTPStatusCode::BAD_REQUEST}, {StatusCode::INVALID_CONTENT_SIZE, net_http::HTTPStatusCode::BAD_REQUEST}, diff --git a/src/inferencerequest.cpp b/src/inferencerequest.cpp index a6e333686f..4b3518d957 100644 --- a/src/inferencerequest.cpp +++ b/src/inferencerequest.cpp @@ -31,7 +31,7 @@ const std::string& InferenceRequest::getServableName() const { model_version_t InferenceRequest::getServableVersion() const { return this->servableVersion; } -Status InferenceRequest::addInput(const char* name, OVMS_DataType datatype, const size_t* shape, size_t dimCount) { +Status InferenceRequest::addInput(const char* name, OVMS_DataType datatype, const int64_t* shape, size_t dimCount) { auto [it, emplaced] = inputs.emplace(name, InferenceTensor{datatype, shape, dimCount}); return emplaced ? StatusCode::OK : StatusCode::DOUBLE_TENSOR_INSERT; } @@ -81,7 +81,7 @@ Status InferenceRequest::removeParameter(const char* name) { if (count) { return StatusCode::OK; } - return StatusCode::NONEXISTENT_PARAMETER_FOR_REMOVAL; + return StatusCode::NONEXISTENT_PARAMETER; } const InferenceParameter* InferenceRequest::getParameter(const char* name) const { auto it = parameters.find(name); @@ -104,10 +104,14 @@ Status InferenceRequest::getBatchSize(size_t& batchSize, size_t batchSizeIndex) batchSize = shape[batchSizeIndex]; return StatusCode::OK; } + +// Assuming the request is already validated, therefore no need to check for negative values or zeros std::map InferenceRequest::getRequestShapes() const { std::map result; for (auto& [name, tensor] : inputs) { - result.emplace(name, tensor.getShape()); + result.emplace(name, shape_t( + reinterpret_cast(tensor.getShape().data()), + reinterpret_cast(tensor.getShape().data() + tensor.getShape().size()))); } return result; } diff --git a/src/inferencerequest.hpp b/src/inferencerequest.hpp index bba5c359fb..9f010f1e40 100644 --- a/src/inferencerequest.hpp +++ b/src/inferencerequest.hpp @@ -37,7 +37,7 @@ class InferenceRequest { // this constructor can be removed with prediction tests overhaul InferenceRequest(); InferenceRequest(const char* modelName, model_version_t modelVersion); - Status addInput(const char* name, OVMS_DataType datatype, const size_t* shape, size_t dimCount); + Status addInput(const char* name, OVMS_DataType datatype, const int64_t* shape, size_t dimCount); Status getInput(const char* name, const InferenceTensor** tensor) const; uint64_t getInputsSize() const; Status removeInput(const char* name); diff --git a/src/inferenceresponse.cpp b/src/inferenceresponse.cpp index ee6db1fa9d..accd605a75 100644 --- a/src/inferenceresponse.cpp +++ b/src/inferenceresponse.cpp @@ -44,7 +44,7 @@ model_version_t InferenceResponse::getServableVersion() const { return this->servableVersion; } -Status InferenceResponse::addOutput(const std::string& name, OVMS_DataType datatype, const size_t* shape, size_t dimCount) { +Status InferenceResponse::addOutput(const std::string& name, OVMS_DataType datatype, const int64_t* shape, size_t dimCount) { auto it = std::find_if(outputs.begin(), outputs.end(), [&name](const std::pair& pair) { diff --git a/src/inferenceresponse.hpp b/src/inferenceresponse.hpp index 8d9ab4abf3..21f29e8033 100644 --- a/src/inferenceresponse.hpp +++ b/src/inferenceresponse.hpp @@ -37,7 +37,7 @@ class InferenceResponse { // this constructor can be removed with prediction tests overhaul InferenceResponse(); InferenceResponse(const std::string& servableName, model_version_t servableVersion); - Status addOutput(const std::string& name, OVMS_DataType datatype, const size_t* shape, size_t dimCount); + Status addOutput(const std::string& name, OVMS_DataType datatype, const int64_t* shape, size_t dimCount); Status getOutput(uint32_t id, const std::string** name, const InferenceTensor** tensor) const; Status getOutput(uint32_t id, const std::string** name, InferenceTensor** tensor); diff --git a/src/inferencetensor.cpp b/src/inferencetensor.cpp index 0b3e32cacd..9b46ff2b72 100644 --- a/src/inferencetensor.cpp +++ b/src/inferencetensor.cpp @@ -27,7 +27,7 @@ InferenceTensor::InferenceTensor(InferenceTensor&& rhs) : datatype(std::move(rhs.datatype)), shape(std::move(rhs.shape)), buffer(std::move(rhs.buffer)) {} -InferenceTensor::InferenceTensor(OVMS_DataType datatype, const size_t* shape, size_t dimCount) : +InferenceTensor::InferenceTensor(OVMS_DataType datatype, const int64_t* shape, size_t dimCount) : datatype(datatype), shape(shape, shape + dimCount) {} @@ -50,7 +50,7 @@ Status InferenceTensor::setBuffer(std::unique_ptr&& buffer) { OVMS_DataType InferenceTensor::getDataType() const { return this->datatype; } -const shape_t& InferenceTensor::getShape() const { +const signed_shape_t& InferenceTensor::getShape() const { return this->shape; } const Buffer* const InferenceTensor::getBuffer() const { diff --git a/src/inferencetensor.hpp b/src/inferencetensor.hpp index 3b20734d7e..99b89df29b 100644 --- a/src/inferencetensor.hpp +++ b/src/inferencetensor.hpp @@ -17,6 +17,7 @@ #include #include #include +#include #include "ovms.h" // NOLINT #include "shape.hpp" @@ -27,11 +28,11 @@ class Status; class InferenceTensor { const OVMS_DataType datatype; - shape_t shape; + signed_shape_t shape; std::unique_ptr buffer; public: - InferenceTensor(OVMS_DataType datatype, const size_t* shape, size_t dimCount); + InferenceTensor(OVMS_DataType datatype, const int64_t* shape, size_t dimCount); ~InferenceTensor(); InferenceTensor(InferenceTensor&&); InferenceTensor(const InferenceTensor&) = delete; @@ -41,7 +42,7 @@ class InferenceTensor { Status setBuffer(std::unique_ptr&& buffer); Status removeBuffer(); OVMS_DataType getDataType() const; - const shape_t& getShape() const; + const signed_shape_t& getShape() const; const Buffer* const getBuffer() const; }; } // namespace ovms diff --git a/src/kfs_frontend/kfs_grpc_inference_service.cpp b/src/kfs_frontend/kfs_grpc_inference_service.cpp index 72f35e1c73..3ce17ec923 100644 --- a/src/kfs_frontend/kfs_grpc_inference_service.cpp +++ b/src/kfs_frontend/kfs_grpc_inference_service.cpp @@ -17,7 +17,10 @@ #include #include +#include #include +#include +#include #include "../dags/pipeline.hpp" #include "../dags/pipelinedefinition.hpp" @@ -27,6 +30,10 @@ #include "../execution_context.hpp" #include "../grpc_utils.hpp" #include "../kfs_frontend/kfs_utils.hpp" +#if (MEDIAPIPE_DISABLE == 0) +#include "../mediapipe_internal/mediapipegraphdefinition.hpp" +#include "../mediapipe_internal/mediapipegraphexecutor.hpp" +#endif #include "../metric.hpp" #include "../modelinstance.hpp" #include "../modelinstanceunloadguard.hpp" @@ -108,7 +115,17 @@ Status KFSInferenceServiceImpl::getModelReady(const KFSGetModelStatusRequest* re SPDLOG_DEBUG("ModelReady requested model {} is missing, trying to find pipeline with such name", name); auto pipelineDefinition = manager.getPipelineFactory().findDefinitionByName(name); if (!pipelineDefinition) { - return Status(StatusCode::MODEL_NAME_MISSING); +#if (MEDIAPIPE_DISABLE == 0) + auto mediapipeGraphDefinition = manager.getMediapipeFactory().findDefinitionByName(name); + if (!mediapipeGraphDefinition) { + return StatusCode::MODEL_NAME_MISSING; + } + auto status = buildResponse(*mediapipeGraphDefinition, response); + // INCREMENT_IF_ENABLED(pipelineDefinition->getMetricReporter().getModelReadyMetric(executionContext, status.ok())); TODO metrics + return status; +#else + return StatusCode::MODEL_NAME_MISSING; +#endif } auto status = buildResponse(*pipelineDefinition, response); INCREMENT_IF_ENABLED(pipelineDefinition->getMetricReporter().getModelReadyMetric(executionContext, status.ok())); @@ -222,16 +239,27 @@ ::grpc::Status KFSInferenceServiceImpl::ModelInfer(::grpc::ServerContext* contex request->model_name(), request->model_version()); ServableMetricReporter* reporter = nullptr; - auto status = this->ModelInferImpl(context, request, response, ExecutionContext{ExecutionContext::Interface::GRPC, ExecutionContext::Method::ModelInfer}, reporter); - timer.stop(TOTAL); - if (!status.ok()) { - return grpc(status); - } - if (!reporter) { - return grpc(Status(StatusCode::INTERNAL_ERROR)); // should not happen + Status status; + const std::string servableName = request->model_name(); + try { + status = this->ModelInferImpl(context, request, response, ExecutionContext{ExecutionContext::Interface::GRPC, ExecutionContext::Method::ModelInfer}, reporter); + timer.stop(TOTAL); + if (!status.ok()) { + return grpc(status); + } + } catch (const std::exception& e) { + SPDLOG_ERROR("Caught exception in InferenceServiceImpl for servable: {} exception: {}", servableName, e.what()); + return grpc(Status(StatusCode::UNKNOWN_ERROR, e.what())); + } catch (...) { + SPDLOG_ERROR("Caught unknown exception in InferenceServiceImpl for servable: {}", servableName); + return grpc(Status(StatusCode::UNKNOWN_ERROR)); } double requestTotal = timer.elapsed(TOTAL); SPDLOG_DEBUG("Total gRPC request processing time: {} ms", requestTotal / 1000); + if (!reporter) { + return grpc(Status(StatusCode::OK)); + // TODO fix after Mediapipe metrics implementation + } OBSERVE_IF_ENABLED(reporter->requestTimeGrpc, requestTotal); return grpc(status); } @@ -246,6 +274,20 @@ Status KFSInferenceServiceImpl::ModelInferImpl(::grpc::ServerContext* context, c if (status == StatusCode::MODEL_NAME_MISSING) { SPDLOG_DEBUG("Requested model: {} does not exist. Searching for pipeline with that name...", request->model_name()); status = getPipeline(request, response, pipelinePtr); + if (status == StatusCode::PIPELINE_DEFINITION_NAME_MISSING) { + SPDLOG_DEBUG("Requested DAG: {} does not exist. Searching for mediapipe graph with that name...", request->model_name()); +#if (MEDIAPIPE_DISABLE == 0) + std::shared_ptr executor; + status = this->modelManager.createPipeline(executor, request->model_name(), request, response); + if (!status.ok()) { + return status; + } + status = executor->infer(request, response, executionContext, reporterOut); + return status; +#else + SPDLOG_DEBUG("Requested DAG: {} does not exist. Mediapipe support was disabled during build process...", request->model_name()); +#endif + } } if (!status.ok()) { if (modelInstance) { @@ -257,7 +299,7 @@ Status KFSInferenceServiceImpl::ModelInferImpl(::grpc::ServerContext* context, c if (pipelinePtr) { reporterOut = &pipelinePtr->getMetricReporter(); status = pipelinePtr->execute(executionContext); - } else { + } else if (modelInstance) { reporterOut = &modelInstance->getMetricReporter(); status = modelInstance->infer(request, response, modelInstanceUnloadGuard); } @@ -283,6 +325,15 @@ Status KFSInferenceServiceImpl::buildResponse( return StatusCode::OK; } +#if (MEDIAPIPE_DISABLE == 0) +Status KFSInferenceServiceImpl::buildResponse( + MediapipeGraphDefinition& definition, + KFSGetModelStatusResponse* response) { + response->set_ready(definition.getStatus().isAvailable()); + return StatusCode::OK; +} +#endif + static void addReadyVersions(Model& model, KFSModelMetadataResponse* response) { auto modelVersions = model.getModelVersionsMapCopy(); @@ -360,7 +411,7 @@ Status KFSInferenceServiceImpl::buildResponse( } void KFSInferenceServiceImpl::convert( - const std::pair>& from, + const std::pair>& from, KFSModelMetadataResponse::TensorMetadata* to) { to->set_name(from.first); to->set_datatype(ovmsPrecisionToKFSPrecision(from.second->getPrecision())); diff --git a/src/kfs_frontend/kfs_grpc_inference_service.hpp b/src/kfs_frontend/kfs_grpc_inference_service.hpp index c94d80d1ac..8ac1e415af 100644 --- a/src/kfs_frontend/kfs_grpc_inference_service.hpp +++ b/src/kfs_frontend/kfs_grpc_inference_service.hpp @@ -42,6 +42,7 @@ using KFSOutputTensorIteratorType = google::protobuf::internal::RepeatedPtrItera namespace ovms { class ExecutionContext; +class MediapipeGraphDefinition; class Model; class ModelInstance; class ModelInstanceUnloadGuard; @@ -73,7 +74,8 @@ class KFSInferenceServiceImpl final : public GRPCInferenceService::Service { static Status buildResponse(PipelineDefinition& pipelineDefinition, KFSModelMetadataResponse* response); static Status buildResponse(std::shared_ptr instance, KFSGetModelStatusResponse* response); static Status buildResponse(PipelineDefinition& pipelineDefinition, KFSGetModelStatusResponse* response); - static void convert(const std::pair>& from, KFSModelMetadataResponse::TensorMetadata* to); + static Status buildResponse(MediapipeGraphDefinition& pipelineDefinition, KFSGetModelStatusResponse* response); + static void convert(const std::pair>& from, KFSModelMetadataResponse::TensorMetadata* to); static Status getModelReady(const KFSGetModelStatusRequest* request, KFSGetModelStatusResponse* response, const ModelManager& manager, ExecutionContext executionContext); protected: diff --git a/src/kfs_frontend/kfs_utils.cpp b/src/kfs_frontend/kfs_utils.cpp index 29167f47ae..b3eef0f3fe 100644 --- a/src/kfs_frontend/kfs_utils.cpp +++ b/src/kfs_frontend/kfs_utils.cpp @@ -15,6 +15,7 @@ //***************************************************************************** #include "kfs_utils.hpp" +#include #include #include #include @@ -24,6 +25,7 @@ #include "../logging.hpp" #include "../profiler.hpp" #include "../status.hpp" +#include "../tensorinfo.hpp" namespace ovms { Precision KFSPrecisionToOvmsPrecision(const KFSDataType& datatype) { @@ -154,4 +156,43 @@ Status isNativeFileFormatUsed(const KFSRequest& request, const std::string& name bool isNativeFileFormatUsed(const KFSTensorInputProto& proto) { return proto.datatype() == "BYTES"; } + +bool requiresPreProcessing(const KFSTensorInputProto& proto) { + return proto.datatype() == "BYTES"; +} + +std::string& createOrGetString(KFSTensorOutputProto& proto, int index) { + while (proto.contents().bytes_contents_size() <= index) { + proto.mutable_contents()->add_bytes_contents(); + } + return *proto.mutable_contents()->mutable_bytes_contents(index); +} +void setBatchSize(KFSTensorOutputProto& proto, int64_t batch) { + if (proto.shape_size() == 0) { + proto.add_shape(batch); + } else { + proto.set_shape(0, batch); + } +} +void setStringPrecision(KFSTensorOutputProto& proto) { + proto.set_datatype("BYTES"); +} +Status getRawInputContentsBatchSizeAndWidth(const std::string& buffer, int32_t& batchSize, size_t& width) { + size_t offset = 0; + size_t tmpBatchSize = 0; + size_t tmpMaxStringLength = 0; + while (offset + sizeof(uint32_t) <= buffer.size()) { + size_t inputSize = *(reinterpret_cast(buffer.data() + offset)); + tmpMaxStringLength = std::max(tmpMaxStringLength, inputSize); + offset += (sizeof(uint32_t) + inputSize); + tmpBatchSize++; + } + if (offset != buffer.size()) { + SPDLOG_DEBUG("Raw input contents invalid format. Every input need to be preceded by four bytes of its size."); + return StatusCode::INVALID_INPUT_FORMAT; + } + batchSize = tmpBatchSize; + width = tmpMaxStringLength + 1; + return StatusCode::OK; +} } // namespace ovms diff --git a/src/kfs_frontend/kfs_utils.hpp b/src/kfs_frontend/kfs_utils.hpp index 90464837f9..3a3cfb707c 100644 --- a/src/kfs_frontend/kfs_utils.hpp +++ b/src/kfs_frontend/kfs_utils.hpp @@ -31,4 +31,9 @@ Status prepareConsolidatedTensorImpl(KFSResponse* response, const std::string& n const std::string& getRequestServableName(const KFSRequest& request); Status isNativeFileFormatUsed(const KFSRequest& request, const std::string& name, bool& nativeFileFormatUsed); bool isNativeFileFormatUsed(const KFSTensorInputProto& proto); +bool requiresPreProcessing(const KFSTensorInputProto& proto); +std::string& createOrGetString(KFSTensorOutputProto& proto, int index); +void setBatchSize(KFSTensorOutputProto& proto, int64_t batch); +void setStringPrecision(KFSTensorOutputProto& proto); +Status getRawInputContentsBatchSizeAndWidth(const std::string& buffer, int32_t& batchSize, size_t& width); } // namespace ovms diff --git a/src/logging.cpp b/src/logging.cpp index 00aeeec8b6..861f24abaa 100644 --- a/src/logging.cpp +++ b/src/logging.cpp @@ -26,7 +26,9 @@ std::shared_ptr modelmanager_logger = std::make_shared dag_executor_logger = std::make_shared("dag_executor"); std::shared_ptr sequence_manager_logger = std::make_shared("sequence_manager"); std::shared_ptr capi_logger = std::make_shared("C-API"); - +#if (MEDIAPIPE_DISABLE == 0) +std::shared_ptr mediapipe_logger = std::make_shared("mediapipe"); +#endif const std::string default_pattern = "[%Y-%m-%d %T.%e][%t][%n][%l][%s:%#] %v"; static void set_log_level(const std::string log_level, std::shared_ptr logger) { @@ -58,6 +60,9 @@ static void register_loggers(const std::string& log_level, std::vectorset_pattern(default_pattern); sequence_manager_logger->set_pattern(default_pattern); capi_logger->set_pattern(default_pattern); +#if (MEDIAPIPE_DISABLE == 0) + mediapipe_logger->set_pattern(default_pattern); +#endif for (auto& sink : sinks) { gcs_logger->sinks().push_back(sink); azurestorage_logger->sinks().push_back(sink); @@ -66,6 +71,9 @@ static void register_loggers(const std::string& log_level, std::vectorsinks().push_back(sink); sequence_manager_logger->sinks().push_back(sink); capi_logger->sinks().push_back(sink); +#if (MEDIAPIPE_DISABLE == 0) + mediapipe_logger->sinks().push_back(sink); +#endif } set_log_level(log_level, serving_logger); set_log_level(log_level, gcs_logger); @@ -75,6 +83,9 @@ static void register_loggers(const std::string& log_level, std::vector modelmanager_logger; extern std::shared_ptr dag_executor_logger; extern std::shared_ptr sequence_manager_logger; extern std::shared_ptr capi_logger; - +#if (MEDIAPIPE_DISABLE == 0) +extern std::shared_ptr mediapipe_logger; +#endif void configure_logger(const std::string log_level, const std::string log_path); } // namespace ovms diff --git a/src/main_benchmark.cpp b/src/main_benchmark.cpp index 456aca42cb..45dde58ab9 100644 --- a/src/main_benchmark.cpp +++ b/src/main_benchmark.cpp @@ -33,6 +33,7 @@ namespace { +using signed_shape_t = std::vector; class BenchmarkCLIParser { std::unique_ptr options; @@ -89,7 +90,7 @@ void BenchmarkCLIParser::parse(int argc, char** argv) { "MODEL_NAME") ("servable_version", "workload threads per ireq", - cxxopts::value()->default_value("0"), + cxxopts::value()->default_value("0"), "MODEL_VERSION") ("inputs_names", "Comma separated list of inputs names", @@ -112,7 +113,7 @@ void BenchmarkCLIParser::parse(int argc, char** argv) { } } -std::vector parseShapes(const std::string& cliInputShapes) { +signed_shape_t parseShapes(const std::string& cliInputShapes) { auto inputShapes = ovms::tokenize(cliInputShapes, ';'); if (inputShapes.size() != 1) { std::cout << __LINE__ << std::endl; @@ -129,11 +130,11 @@ std::vector parseShapes(const std::string& cliInputShapes) { } std::string shapeString = firstShape.substr(leftBracket + 1, rightBracket - leftBracket - 1); auto dimsString = ovms::tokenize(shapeString, ','); - std::vector shape; + signed_shape_t shape; std::transform(dimsString.begin(), dimsString.end(), std::back_inserter(shape), - [](const std::string& s) -> std::size_t { + [](const std::string& s) -> signed_shape_t::value_type { auto dimOpt = ovms::stoi64(s); - if (!dimOpt.has_value()) { + if (!dimOpt.has_value() || dimOpt.value() <= 0) { std::cout << __LINE__ << " " << s << std::endl; throw std::invalid_argument("Invalid shape argument"); } @@ -175,13 +176,11 @@ static void installSignalHandlers() { sigaction(SIGILL, &sigIllHandler, NULL); } -using shape_t = std::vector; - -OVMS_InferenceRequest* prepareRequest(OVMS_Server* server, const std::string& servableName, uint32_t servableVersion, OVMS_DataType datatype, const shape_t& shape, const std::string& inputName, const void* data) { +OVMS_InferenceRequest* prepareRequest(OVMS_Server* server, const std::string& servableName, int64_t servableVersion, OVMS_DataType datatype, const signed_shape_t& shape, const std::string& inputName, const void* data) { OVMS_InferenceRequest* request{nullptr}; OVMS_InferenceRequestNew(&request, server, servableName.c_str(), servableVersion); OVMS_InferenceRequestAddInput(request, inputName.c_str(), datatype, shape.data(), shape.size()); - size_t elementsCount = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); + auto elementsCount = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); OVMS_InferenceRequestInputSetData(request, inputName.c_str(), data, sizeof(float) * elementsCount, OVMS_BUFFERTYPE_CPU, 0); return request; } @@ -279,7 +278,11 @@ int main(int argc, char** argv) { // model parameters /////////////////////// std::string servableName(cliparser.result->operator[]("servable_name").as()); - uint64_t servableVersion(cliparser.result->operator[]("servable_version").as()); + int64_t servableVersion(cliparser.result->operator[]("servable_version").as()); + if (servableVersion < 0) { + std::cerr << "servableVersion cannot be negative" << std::endl; + return EX_USAGE; + } // input names handling std::string cliInputsNames(cliparser.result->operator[]("inputs_names").as()); auto inputsNames = ovms::tokenize(cliInputsNames, ','); @@ -291,7 +294,7 @@ int main(int argc, char** argv) { // datatype handling OVMS_DataType datatype = OVMS_DATATYPE_FP32; // shape handling - std::vector shape = parseShapes(cliparser.result->operator[]("shape").as()); + signed_shape_t shape = parseShapes(cliparser.result->operator[]("shape").as()); /////////////////////// // benchmark parameters /////////////////////// @@ -301,7 +304,7 @@ int main(int argc, char** argv) { size_t threadCount = nireq * threadsPerIreq; size_t niterPerThread = niter / threadCount; - size_t elementsCount = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); + auto elementsCount = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); std::vector data(elementsCount, 0.1); /////////////////////// @@ -337,19 +340,6 @@ int main(int argc, char** argv) { } OVMS_InferenceResponseDelete(response); - /////////////////////// - // prepare response data - /////////////////////// - uint32_t outputCount = 0; - const void* voutputData; - size_t bytesize = 0; - uint32_t outputId = outputCount - 1; - OVMS_DataType responseDatatype = (OVMS_DataType) 42; - const uint64_t* outputShape{nullptr}; - uint32_t dimCount = 0; - OVMS_BufferType bufferType = (OVMS_BufferType)42; - uint32_t deviceId = 42; - const char* outputName{nullptr}; /////////////////////// // setup workload machinery /////////////////////// diff --git a/src/main_capi.c b/src/main_capi.c index 21910e6646..95c3f3589a 100644 --- a/src/main_capi.c +++ b/src/main_capi.c @@ -20,6 +20,10 @@ #include "ovms.h" int main() { + uint32_t major = 0, minor = 0; + OVMS_ApiVersion(&major, &minor); + printf("C-API Version: %d.%d\n", major, minor); + OVMS_ServerSettings* serverSettings = 0; OVMS_ModelsSettings* modelsSettings = 0; OVMS_Server* srv; @@ -55,9 +59,9 @@ int main() { printf("Server ready for inference\n"); - const uint64_t SHAPE_N = 30; - const uint64_t SHAPE_C = 20; - const uint64_t SHAPE[2] = {SHAPE_N, SHAPE_C}; + const int64_t SHAPE_N = 30; + const int64_t SHAPE_C = 20; + const int64_t SHAPE[2] = {SHAPE_N, SHAPE_C}; const size_t NUM_ELEMENTS = SHAPE_N * SHAPE_C; const size_t DATA_SIZE = NUM_ELEMENTS * sizeof(float); const float INPUT_ELEMENT_VALUE = 3.2f; @@ -95,8 +99,8 @@ int main() { const char* oName = NULL; // not needed OVMS_DataType oType; // not needed - const uint64_t* oShape; // not needed - uint32_t oDims; // not needed + const int64_t* oShape; // not needed + size_t oDims; // not needed const void* oData = NULL; size_t oNumBytes = 0; OVMS_BufferType oBuffType; // not needed diff --git a/src/main_capi.cpp b/src/main_capi.cpp index fb65880059..2e4d6abeea 100644 --- a/src/main_capi.cpp +++ b/src/main_capi.cpp @@ -29,10 +29,10 @@ #include "ovms.h" // NOLINT const char* MODEL_NAME = "dummy"; -const uint64_t MODEL_VERSION = 1; +const int64_t MODEL_VERSION = 1; const char* INPUT_NAME = "b"; constexpr size_t DIM_COUNT = 2; -constexpr size_t SHAPE[DIM_COUNT] = {1, 10}; +constexpr int64_t SHAPE[DIM_COUNT] = {1, 10}; namespace { volatile sig_atomic_t shutdown_request = 0; @@ -73,6 +73,10 @@ static void installSignalHandlers() { int main(int argc, char** argv) { installSignalHandlers(); + uint32_t major = 0, minor = 0; + OVMS_ApiVersion(&major, &minor); + std::cout << "C-API Version: " << major << "." << minor << std::endl; + OVMS_ServerSettings* serverSettings = 0; OVMS_ModelsSettings* modelsSettings = 0; OVMS_Server* srv; @@ -124,6 +128,12 @@ int main(int argc, char** argv) { OVMS_StatusGetDetails(res, &details); std::cout << "Error occured during inference. Code:" << code << ", details:" << details << std::endl; + OVMS_StatusDelete(res); + OVMS_InferenceRequestDelete(request); + OVMS_ServerDelete(srv); + OVMS_ModelsSettingsDelete(modelsSettings); + OVMS_ServerSettingsDelete(serverSettings); + return 1; } // read output uint32_t outputCount = 0; @@ -132,8 +142,8 @@ int main(int argc, char** argv) { size_t bytesize = 0; uint32_t outputId = outputCount - 1; OVMS_DataType datatype = (OVMS_DataType)42; - const uint64_t* shape{nullptr}; - uint32_t dimCount = 0; + const int64_t* shape{nullptr}; + size_t dimCount = 0; OVMS_BufferType bufferType = (OVMS_BufferType)42; uint32_t deviceId = 42; const char* outputName{nullptr}; diff --git a/src/mediapipe_calculators/calculators.md b/src/mediapipe_calculators/calculators.md new file mode 100644 index 0000000000..3f18e2e741 --- /dev/null +++ b/src/mediapipe_calculators/calculators.md @@ -0,0 +1,20 @@ +# Mediapipe calculators {#ovms_docs_mediapipe_calculators} + +## Introduction + +"Each calculator is a node of a graph. The bulk of graph execution happens inside its calculators. OpenVINO Model Server(OVMS) has its own calculators but can also use newly developed calculators or reuse the existing calculators defined in the original mediapipe repository." + +For more details about the calculator concept you can visit mediapipe [Calculators Concept Page](https://developers.google.com/mediapipe/framework/framework_concepts/calculators) + +### OVMS ADAPTER + +Is an implementation of [Intel Model API](https://github.com/openvinotoolkit/model_api) Adapter [interface](https://github.com/openvinotoolkit/model_api/blob/master/model_api/cpp/adapters/include/adapters/inference_adapter.h) that executes inference with OVMS [C-API](https://github.com/openvinotoolkit/model_server/blob/develop/docs/model_server_c_api.md). + +### OVMS SESSION CALCULATOR + +This [calculator](https://github.com/openvinotoolkit/model_server/blob/develop/src/mediapipe_calculators/modelapiovmsinferencecalculator.cc) is creating OVMS Adapter to declare what model/[DAG](https://github.com/openvinotoolkit/model_server/blob/develop/docs/dag_scheduler.md) should be used in inference. It has mandatory field `servable_name` and optional `servable_version`. In case of missing `servable_version` OVMS will use default version for targeted servable. + +### OVMS INFERENCE CALCULATOR + +This [calculator](https://github.com/openvinotoolkit/model_server/blob/develop/src/mediapipe_calculators/modelapiovmssessioncalculator.cc) is using OVMS Adapter received as `input_side_packet` to execute inference with OVMS. It has optional options fields `tag_to_input_tensor_names` and `tag_to_output_tensor_names` that can serve as Mediapipe packet names mapping to servable (Model/DAG) inputs and/or outputs. It accepts `ov::Tensor` as input and output packet types. + diff --git a/src/mediapipe_calculators/modelapiovmsadapter.cc b/src/mediapipe_calculators/modelapiovmsadapter.cc new file mode 100644 index 0000000000..4826bbd79e --- /dev/null +++ b/src/mediapipe_calculators/modelapiovmsadapter.cc @@ -0,0 +1,304 @@ +//***************************************************************************** +// Copyright 2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include "modelapiovmsadapter.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "../stringutils.hpp" // TODO dispose +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/canonical_errors.h" +#include "src/mediapipe_calculators/ovmscalculator.pb.h" +// here we need to decide if we have several calculators (1 for OVMS repository, 1-N inside mediapipe) +// for the one inside OVMS repo it makes sense to reuse code from ovms lib +namespace mediapipe { +#define MLOG(A) LOG(ERROR) << __FILE__ << ":" << __LINE__ << " " << A << std::endl; + +using std::endl; + +#define ASSERT_CAPI_STATUS_NULL(C_API_CALL) \ + { \ + auto* err = C_API_CALL; \ + if (err != nullptr) { \ + uint32_t code = 0; \ + const char* msg = nullptr; \ + OVMS_StatusGetCode(err, &code); \ + OVMS_StatusGetDetails(err, &msg); \ + LOG(ERROR) << "Error encountred in OVMSCalculator:" << msg << " code: " << code; \ + std::runtime_error exc(msg); \ + OVMS_StatusDelete(err); \ + throw exc; \ + } \ + } +#define CREATE_GUARD(GUARD_NAME, CAPI_TYPE, CAPI_PTR) \ + std::unique_ptr GUARD_NAME(CAPI_PTR, &(CAPI_TYPE##Delete)); + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-function" +using InferenceOutput = std::map; +using InferenceInput = std::map; +// TODO +// * why std::map +// * no ret code from infer() +// * no ret code from load() +namespace ovms { +static OVMS_DataType OVPrecision2CAPI(ov::element::Type_t datatype); +static ov::element::Type_t CAPI2OVPrecision(OVMS_DataType datatype); +static ov::Tensor makeOvTensorO(OVMS_DataType datatype, const int64_t* shape, size_t dimCount, const void* voutputData, size_t bytesize); + +OVMSInferenceAdapter::OVMSInferenceAdapter(const std::string& servableName, uint32_t servableVersion, OVMS_Server* cserver) : + servableName(servableName), + servableVersion(servableVersion) { + if (nullptr != cserver) { + this->cserver = cserver; + } else { + OVMS_ServerNew(&this->cserver); + } +} + +OVMSInferenceAdapter::~OVMSInferenceAdapter() { + LOG(ERROR) << "OVMSAdapter destr"; +} + +InferenceOutput OVMSInferenceAdapter::infer(const InferenceInput& input) { + ///////////////////// + // PREPARE REQUEST + ///////////////////// + OVMS_InferenceRequest* request{nullptr}; + ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestNew(&request, cserver, servableName.c_str(), servableVersion)); + CREATE_GUARD(requestGuard, OVMS_InferenceRequest, request); + + InferenceOutput output; + OVMS_Status* status{nullptr}; + // PREPARE EACH INPUT + // extract single tensor + for (const auto& [name, input_tensor] : input) { + // TODO validate existence of tag key in map + // or handle inference when there is no need for mapping + const char* realInputName = name.c_str(); +#if 0 + const float* input_tensor_access = reinterpret_cast(input_tensor.data()); + std::stringstream ss; + ss << " Adapter received tensor: [ "; + for (int x = 0; x < 10; ++x) { + ss << input_tensor_access[x] << " "; + } + ss << " ]"; + MLOG(ss.str()); +#endif + const auto& ovinputShape = input_tensor.get_shape(); + std::vector inputShape{ovinputShape.begin(), ovinputShape.end()}; // TODO error handling shape conversion + OVMS_DataType inputDataType = OVPrecision2CAPI(input_tensor.get_element_type()); + ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestAddInput(request, realInputName, inputDataType, inputShape.data(), inputShape.size())); // TODO retcode + const uint32_t NOT_USED_NUM = 0; + // TODO handle hardcoded buffertype, notUsedNum additional options? side packets? + ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestInputSetData(request, + realInputName, + reinterpret_cast(input_tensor.data()), + input_tensor.get_byte_size(), + OVMS_BUFFERTYPE_CPU, + NOT_USED_NUM)); // TODO retcode + } + ////////////////// + // INFERENCE + ////////////////// + OVMS_InferenceResponse* response = nullptr; + status = OVMS_Inference(cserver, request, &response); + if (nullptr != status) { + uint32_t code = 0; + const char* msg = nullptr; + OVMS_StatusGetCode(status, &code); + OVMS_StatusGetDetails(status, &msg); + std::stringstream ss; + ss << "Inference in OVMSAdapter failed: "; + ss << msg << " code: " << code; + MLOG(ss.str()); + OVMS_StatusDelete(status); + return output; + } + CREATE_GUARD(responseGuard, OVMS_InferenceResponse, response); + // verify GetOutputCount + uint32_t outputCount = 42; + ASSERT_CAPI_STATUS_NULL(OVMS_InferenceResponseGetOutputCount(response, &outputCount)); + uint32_t parameterCount = 42; + ASSERT_CAPI_STATUS_NULL(OVMS_InferenceResponseGetParameterCount(response, ¶meterCount)); + // TODO handle output filtering. Graph definition could suggest + // that we are not interested in all outputs from OVMS Inference + const void* voutputData; + size_t bytesize = 42; + OVMS_DataType datatype = (OVMS_DataType)199; + const int64_t* shape{nullptr}; + size_t dimCount = 42; + OVMS_BufferType bufferType = (OVMS_BufferType)199; + uint32_t deviceId = 42; + const char* outputName{nullptr}; + for (size_t i = 0; i < outputCount; ++i) { + ASSERT_CAPI_STATUS_NULL(OVMS_InferenceResponseGetOutput(response, i, &outputName, &datatype, &shape, &dimCount, &voutputData, &bytesize, &bufferType, &deviceId)); + output[outputName] = makeOvTensorO(datatype, shape, dimCount, voutputData, bytesize); // TODO optimize FIXME + } + return output; +} +void OVMSInferenceAdapter::loadModel(const std::shared_ptr& model, ov::Core& core, + const std::string& device, const ov::AnyMap& compilationConfig) { + // no need to load but we need to extract metadata + OVMS_ServableMetadata* servableMetadata = nullptr; + ASSERT_CAPI_STATUS_NULL(OVMS_GetServableMetadata(cserver, servableName.c_str(), servableVersion, &servableMetadata)); + uint32_t inputCount = 0; + uint32_t outputCount = 0; + // TODO ensure Metadata object removal in all paths + ASSERT_CAPI_STATUS_NULL(OVMS_ServableMetadataGetInputCount(servableMetadata, &inputCount)); + ASSERT_CAPI_STATUS_NULL(OVMS_ServableMetadataGetOutputCount(servableMetadata, &outputCount)); + + uint32_t id = 0; + OVMS_DataType datatype = (OVMS_DataType)199; + int64_t* shapeMin{nullptr}; + int64_t* shapeMax{nullptr}; + size_t dimCount = 42; + const char* tensorName{nullptr}; + for (id = 0; id < inputCount; ++id) { + ASSERT_CAPI_STATUS_NULL(OVMS_ServableMetadataGetInput(servableMetadata, id, &tensorName, &datatype, &dimCount, &shapeMin, &shapeMax)); + inputNames.emplace_back(tensorName); + shape_min_max_t inputMinMax; + for (size_t i = 0; i < dimCount; ++i) { + // TODO test adapter dynamic shapes + inputMinMax.first.emplace_back(shapeMin[i]); + inputMinMax.second.emplace_back(shapeMax[i]); + } + this->inShapesMinMaxes.insert({tensorName, std::move(inputMinMax)}); + } + for (id = 0; id < outputCount; ++id) { + ASSERT_CAPI_STATUS_NULL(OVMS_ServableMetadataGetOutput(servableMetadata, id, &tensorName, &datatype, &dimCount, &shapeMin, &shapeMax)); + outputNames.emplace_back(tensorName); + } + const ov::AnyMap* servableMetadataRtInfo; + ASSERT_CAPI_STATUS_NULL(OVMS_ServableMetadataGetInfo(servableMetadata, reinterpret_cast(&servableMetadataRtInfo))); + this->modelConfig = *servableMetadataRtInfo; + OVMS_ServableMetadataDelete(servableMetadata); +} + +ov::Shape OVMSInferenceAdapter::getInputShape(const std::string& inputName) const { + auto it = inShapesMinMaxes.find(inputName); + if (it == inShapesMinMaxes.end()) { + throw std::runtime_error(std::string("Adapter could not find input:") + inputName); // TODO error handling + } + + ov::Shape ovShape; + const auto& [minBorder, maxBorder] = it->second; + ovShape.reserve(minBorder.size()); + for (const auto& d : minBorder) { + ovShape.emplace_back(d); + } + // TODO support partial shapes (dynamic) + ov::Shape ovShape2; + for (const auto& d : maxBorder) { + ovShape2.emplace_back(d); + } + return ovShape; +} + +std::vector OVMSInferenceAdapter::getInputNames() const { return inputNames; } + +std::vector OVMSInferenceAdapter::getOutputNames() const { return outputNames; } + +const ov::AnyMap& OVMSInferenceAdapter::getModelConfig() const { + return modelConfig; +} + +static OVMS_DataType OVPrecision2CAPI(ov::element::Type_t datatype) { + static std::unordered_map precisionMap{ + {ov::element::Type_t::f64, OVMS_DATATYPE_FP64}, + {ov::element::Type_t::f32, OVMS_DATATYPE_FP32}, + {ov::element::Type_t::f16, OVMS_DATATYPE_FP16}, + {ov::element::Type_t::i64, OVMS_DATATYPE_I64}, + {ov::element::Type_t::i32, OVMS_DATATYPE_I32}, + {ov::element::Type_t::i16, OVMS_DATATYPE_I16}, + {ov::element::Type_t::i8, OVMS_DATATYPE_I8}, + {ov::element::Type_t::i4, OVMS_DATATYPE_I4}, + {ov::element::Type_t::u64, OVMS_DATATYPE_U64}, + {ov::element::Type_t::u32, OVMS_DATATYPE_U32}, + {ov::element::Type_t::u16, OVMS_DATATYPE_U16}, + {ov::element::Type_t::u8, OVMS_DATATYPE_U8}, + {ov::element::Type_t::u4, OVMS_DATATYPE_U4}, + {ov::element::Type_t::u1, OVMS_DATATYPE_U1}, + {ov::element::Type_t::boolean, OVMS_DATATYPE_BOOL}, + {ov::element::Type_t::bf16, OVMS_DATATYPE_BF16}, + {ov::element::Type_t::undefined, OVMS_DATATYPE_UNDEFINED}, + {ov::element::Type_t::dynamic, OVMS_DATATYPE_DYNAMIC} + // {ov::element::Type_t::, OVMS_DATATYPE_MIXEDMIXED}, + // {ov::element::Type_t::, OVMS_DATATYPE_Q78Q78}, + // {ov::element::Type_t::, OVMS_DATATYPE_BINBIN}, + // {ov::element::Type_t::, OVMS_DATATYPE_CUSTOMCUSTOM + }; + auto it = precisionMap.find(datatype); + if (it == precisionMap.end()) { + return OVMS_DATATYPE_UNDEFINED; + } + return it->second; +} + +static ov::element::Type_t CAPI2OVPrecision(OVMS_DataType datatype) { + static std::unordered_map precisionMap{ + {OVMS_DATATYPE_FP64, ov::element::Type_t::f64}, + {OVMS_DATATYPE_FP32, ov::element::Type_t::f32}, + {OVMS_DATATYPE_FP16, ov::element::Type_t::f16}, + {OVMS_DATATYPE_I64, ov::element::Type_t::i64}, + {OVMS_DATATYPE_I32, ov::element::Type_t::i32}, + {OVMS_DATATYPE_I16, ov::element::Type_t::i16}, + {OVMS_DATATYPE_I8, ov::element::Type_t::i8}, + {OVMS_DATATYPE_I4, ov::element::Type_t::i4}, + {OVMS_DATATYPE_U64, ov::element::Type_t::u64}, + {OVMS_DATATYPE_U32, ov::element::Type_t::u32}, + {OVMS_DATATYPE_U16, ov::element::Type_t::u16}, + {OVMS_DATATYPE_U8, ov::element::Type_t::u8}, + {OVMS_DATATYPE_U4, ov::element::Type_t::u4}, + {OVMS_DATATYPE_U1, ov::element::Type_t::u1}, + {OVMS_DATATYPE_BOOL, ov::element::Type_t::boolean}, + {OVMS_DATATYPE_BF16, ov::element::Type_t::bf16}, + {OVMS_DATATYPE_UNDEFINED, ov::element::Type_t::undefined}, + {OVMS_DATATYPE_DYNAMIC, ov::element::Type_t::dynamic} + // {OVMS_DATATYPE_MIXED, ov::element::Type_t::MIXED}, + // {OVMS_DATATYPE_Q78, ov::element::Type_t::Q78}, + // {OVMS_DATATYPE_BIN, ov::element::Type_t::BIN}, + // {OVMS_DATATYPE_CUSTOM, ov::element::Type_t::CUSTOM + }; + auto it = precisionMap.find(datatype); + if (it == precisionMap.end()) { + return ov::element::Type_t::undefined; + } + return it->second; +} + +static ov::Tensor makeOvTensorO(OVMS_DataType datatype, const int64_t* shape, size_t dimCount, const void* voutputData, size_t bytesize) { + ov::Shape ovShape; + for (size_t i = 0; i < dimCount; ++i) { + ovShape.push_back(shape[i]); + } + // here we make copy of underlying OVMS repsonse tensor + ov::Tensor output(CAPI2OVPrecision(datatype), ovShape); + std::memcpy(output.data(), voutputData, bytesize); + return output; +} + +#pragma GCC diagnostic pop +} // namespace ovms +} // namespace mediapipe diff --git a/src/mediapipe_calculators/modelapiovmsadapter.hpp b/src/mediapipe_calculators/modelapiovmsadapter.hpp new file mode 100644 index 0000000000..89762c5ba1 --- /dev/null +++ b/src/mediapipe_calculators/modelapiovmsadapter.hpp @@ -0,0 +1,68 @@ +#pragma once +//***************************************************************************** +// Copyright 2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include +#include +#include +#include +#include +#include +#include +#include + +#include // TODO fix path model_api/model_api/cpp/adapters/include/adapters/inference_adapter.h +#include + +#include "../ovms.h" // NOLINT +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/canonical_errors.h" +#include "src/mediapipe_calculators/ovmscalculator.pb.h" +// here we need to decide if we have several calculators (1 for OVMS repository, 1-N inside mediapipe) +// for the one inside OVMS repo it makes sense to reuse code from ovms lib + +namespace mediapipe { +namespace ovms { + +using InferenceOutput = std::map; +using InferenceInput = std::map; + +// TODO +// * why std::map +using shape_border_t = std::vector; +using shape_min_max_t = std::pair; +using shapes_min_max_t = std::unordered_map; +class OVMSInferenceAdapter : public ::InferenceAdapter { + OVMS_Server* cserver{nullptr}; + const std::string servableName; + uint32_t servableVersion; + std::vector inputNames; + std::vector outputNames; + shapes_min_max_t inShapesMinMaxes; + ov::AnyMap modelConfig; + +public: + OVMSInferenceAdapter(const std::string& servableName, uint32_t servableVersion = 0, OVMS_Server* server = nullptr); + virtual ~OVMSInferenceAdapter(); + InferenceOutput infer(const InferenceInput& input) override; + void loadModel(const std::shared_ptr& model, ov::Core& core, + const std::string& device, const ov::AnyMap& compilationConfig) override; + ov::Shape getInputShape(const std::string& inputName) const override; + std::vector getInputNames() const override; + std::vector getOutputNames() const override; + const ov::AnyMap& getModelConfig() const override; +}; +} // namespace ovms +} // namespace mediapipe diff --git a/src/mediapipe_calculators/modelapiovmsadapterwrapper.cc b/src/mediapipe_calculators/modelapiovmsadapterwrapper.cc new file mode 100644 index 0000000000..8e8b02fa23 --- /dev/null +++ b/src/mediapipe_calculators/modelapiovmsadapterwrapper.cc @@ -0,0 +1,38 @@ +//***************************************************************************** +// Copyright 2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include "modelapiovmsadapterwrapper.hpp" + +#include + +#include // TODO fix path model_api/model_api/cpp/adapters/include/adapters/inference_adapter.h + +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/canonical_errors.h" +#include "src/mediapipe_calculators/ovmscalculator.pb.h" +// here we need to decide if we have several calculators (1 for OVMS repository, 1-N inside mediapipe) +// for the one inside OVMS repo it makes sense to reuse code from ovms lib +namespace mediapipe { +namespace ovms { + +AdapterWrapper::AdapterWrapper(::InferenceAdapter* adapter) : + adapter(adapter) { + LOG(ERROR) << "Wrapper constr"; +} +AdapterWrapper::~AdapterWrapper() { + LOG(ERROR) << "Wrapper destr"; +} +} // namespace ovms +} // namespace mediapipe diff --git a/src/systeminfo_impl.hpp b/src/mediapipe_calculators/modelapiovmsadapterwrapper.hpp similarity index 77% rename from src/systeminfo_impl.hpp rename to src/mediapipe_calculators/modelapiovmsadapterwrapper.hpp index 5d172033c1..55bf59ea33 100644 --- a/src/systeminfo_impl.hpp +++ b/src/mediapipe_calculators/modelapiovmsadapterwrapper.hpp @@ -1,3 +1,4 @@ +#pragma once //***************************************************************************** // Copyright 2023 Intel Corporation // @@ -13,14 +14,17 @@ // See the License for the specific language governing permissions and // limitations under the License. //***************************************************************************** -#pragma once -#include -#include +#include -#include +class InferenceAdapter; +namespace mediapipe { namespace ovms { -class Status; -Status getCoreCountImpl(const std::string& cpusets, uint16_t& coreCount); -Status getCPUSetFile(std::ifstream& ifs, const std::string& filename); + +struct AdapterWrapper { + std::unique_ptr<::InferenceAdapter> adapter; + AdapterWrapper(::InferenceAdapter* adapter); + ~AdapterWrapper(); +}; } // namespace ovms +} // namespace mediapipe diff --git a/src/mediapipe_calculators/modelapiovmsinferencecalculator.cc b/src/mediapipe_calculators/modelapiovmsinferencecalculator.cc new file mode 100644 index 0000000000..9ad2c41ff8 --- /dev/null +++ b/src/mediapipe_calculators/modelapiovmsinferencecalculator.cc @@ -0,0 +1,174 @@ +//***************************************************************************** +// Copyright 2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include +#include +#include + +#include // TODO fix path model_api/model_api/cpp/adapters/include/adapters/inference_adapter.h +#include + +#include "../ovms.h" // NOLINT +#include "../stringutils.hpp" // TODO dispose +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/canonical_errors.h" +#include "modelapiovmsadapterwrapper.hpp" +#include "src/mediapipe_calculators/modelapiovmsinferencecalculator.pb.h" +// here we need to decide if we have several calculators (1 for OVMS repository, 1-N inside mediapipe) +// for the one inside OVMS repo it makes sense to reuse code from ovms lib +namespace mediapipe { +#define MLOG(A) LOG(ERROR) << __FILE__ << ":" << __LINE__ << " " << A << std::endl; + +using ovms::AdapterWrapper; +using std::endl; + +namespace { +#define ASSERT_CAPI_STATUS_NULL(C_API_CALL) \ + { \ + auto* err = C_API_CALL; \ + if (err != nullptr) { \ + uint32_t code = 0; \ + const char* msg = nullptr; \ + OVMS_StatusGetCode(err, &code); \ + OVMS_StatusGetDetails(err, &msg); \ + LOG(ERROR) << "Error encountred in OVMSCalculator:" << msg << " code: " << code; \ + OVMS_StatusDelete(err); \ + RET_CHECK(err == nullptr); \ + } \ + } +#define CREATE_GUARD(GUARD_NAME, CAPI_TYPE, CAPI_PTR) \ + std::unique_ptr GUARD_NAME(CAPI_PTR, &(CAPI_TYPE##Delete)); + +} // namespace + +const std::string SESSION_TAG{"SESSION"}; + +class ModelAPISideFeedCalculator : public CalculatorBase { + ::InferenceAdapter* session{nullptr}; + std::unordered_map outputNameToTag; // TODO move to Open(); + +public: + static absl::Status GetContract(CalculatorContract* cc) { + MLOG("Main GetContract start"); + RET_CHECK(!cc->Inputs().GetTags().empty()); + RET_CHECK(!cc->Outputs().GetTags().empty()); + for (const std::string& tag : cc->Inputs().GetTags()) { + cc->Inputs().Tag(tag).Set(); + } + for (const std::string& tag : cc->Outputs().GetTags()) { + cc->Outputs().Tag(tag).Set(); + } + cc->InputSidePackets().Tag(SESSION_TAG.c_str()).Set(); + MLOG("Main GetContract end"); + return absl::OkStatus(); + } + + absl::Status Close(CalculatorContext* cc) final { + MLOG("Main Close"); + return absl::OkStatus(); + } + absl::Status Open(CalculatorContext* cc) final { + MLOG("Main Open start"); + session = cc->InputSidePackets() + .Tag(SESSION_TAG.c_str()) + .Get() + .adapter.get(); + for (CollectionItemId id = cc->Inputs().BeginId(); + id < cc->Inputs().EndId(); ++id) { + if (!cc->Inputs().Get(id).Header().IsEmpty()) { + cc->Outputs().Get(id).SetHeader(cc->Inputs().Get(id).Header()); + } + } + if (cc->OutputSidePackets().NumEntries() != 0) { + for (CollectionItemId id = cc->InputSidePackets().BeginId(); + id < cc->InputSidePackets().EndId(); ++id) { + cc->OutputSidePackets().Get(id).Set(cc->InputSidePackets().Get(id)); + } + } + const auto& options = cc->Options(); + for (const auto& [key, value] : options.tag_to_output_tensor_names()) { + outputNameToTag[value] = key; + } + cc->SetOffset(TimestampDiff(0)); + + MLOG("Main Open end"); + return absl::OkStatus(); + } + + absl::Status Process(CalculatorContext* cc) final { + MLOG("Main process start"); + if (cc->Inputs().NumEntries() == 0) { + return tool::StatusStop(); + } + ///////////////////// + // PREPARE INPUT MAP + ///////////////////// + + const auto& options = cc->Options(); + const auto& inputTagInputMap = options.tag_to_input_tensor_names(); + ::InferenceInput input; + ::InferenceOutput output; + for (const std::string& tag : cc->Inputs().GetTags()) { + const char* realInputName{nullptr}; + auto it = inputTagInputMap.find(tag); + if (it == inputTagInputMap.end()) { + realInputName = tag.c_str(); + } else { + realInputName = it->second.c_str(); + } + auto& packet = cc->Inputs().Tag(tag).Get(); + input[realInputName] = packet; +#if 0 + ov::Tensor input_tensor(packet); + const float* input_tensor_access = reinterpret_cast(input_tensor.data()); + std::stringstream ss; + ss << "ModelAPICalculator received tensor: [ "; + for (int x = 0; x < 10; ++x) { + ss << input_tensor_access[x] << " "; + } + ss << " ] timestamp: " << cc->InputTimestamp().DebugString() << endl; + MLOG(ss.str()); +#endif + } + ////////////////// + // INFERENCE + ////////////////// + output = session->infer(input); + auto outputsCount = output.size(); + RET_CHECK(outputsCount == cc->Outputs().GetTags().size()); + for (const auto& tag : cc->Outputs().GetTags()) { + std::string tensorName; + auto it = options.tag_to_output_tensor_names().find(tag); + if (it == options.tag_to_output_tensor_names().end()) { + tensorName = tag; + } else { + tensorName = it->second; + } + auto tensorIt = output.find(tensorName); + if (tensorIt == output.end()) { + // TODO + throw 54; + } + cc->Outputs().Tag(tag).Add( + new ov::Tensor(tensorIt->second), + cc->InputTimestamp()); + } + MLOG("Main process end"); + return absl::OkStatus(); + } +}; + +REGISTER_CALCULATOR(ModelAPISideFeedCalculator); +} // namespace mediapipe diff --git a/src/mediapipe_calculators/modelapiovmsinferencecalculator.proto b/src/mediapipe_calculators/modelapiovmsinferencecalculator.proto new file mode 100644 index 0000000000..ce5e5371ba --- /dev/null +++ b/src/mediapipe_calculators/modelapiovmsinferencecalculator.proto @@ -0,0 +1,30 @@ +//***************************************************************************** +// Copyright 2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** + +syntax = "proto2"; +package mediapipe; + +import "mediapipe/framework/calculator.proto"; + +message ModelAPIInferenceCalculatorOptions { + extend mediapipe.CalculatorOptions { + // https://github.com/google/mediapipe/issues/634 have to be unique in app + // no rule to obtain this + optional ModelAPIInferenceCalculatorOptions ext = 113473743; + } + map tag_to_input_tensor_names = 1; + map tag_to_output_tensor_names = 2; +} diff --git a/src/mediapipe_calculators/modelapiovmssessioncalculator.cc b/src/mediapipe_calculators/modelapiovmssessioncalculator.cc new file mode 100644 index 0000000000..9e78f892d5 --- /dev/null +++ b/src/mediapipe_calculators/modelapiovmssessioncalculator.cc @@ -0,0 +1,99 @@ +//***************************************************************************** +// Copyright 2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include +#include +#include + +#include + +#include "../ovms.h" // NOLINT +#include "../stringutils.hpp" // TODO dispose +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/canonical_errors.h" +#include "modelapiovmsadapter.hpp" +#include "modelapiovmsadapterwrapper.hpp" +#include "src/mediapipe_calculators/modelapiovmssessioncalculator.pb.h" +// here we need to decide if we have several calculators (1 for OVMS repository, 1-N inside mediapipe) +// for the one inside OVMS repo it makes sense to reuse code from ovms lib +namespace mediapipe { +#define MLOG(A) LOG(ERROR) << __FILE__ << ":" << __LINE__ << " " << A << std::endl; + +using ovms::AdapterWrapper; +using ovms::OVMSInferenceAdapter; +using std::endl; + +const std::string SESSION_TAG{"SESSION"}; + +class ModelAPISessionCalculator : public CalculatorBase { + std::unique_ptr adapter; + std::unordered_map outputNameToTag; + +public: + static absl::Status GetContract(CalculatorContract* cc) { + MLOG("Session GetContract start"); + RET_CHECK(cc->Inputs().GetTags().empty()); + RET_CHECK(cc->Outputs().GetTags().empty()); + cc->OutputSidePackets().Tag(SESSION_TAG.c_str()).Set(); + const auto& options = cc->Options(); + RET_CHECK(!options.servable_name().empty()); + MLOG("Session GetContract middle"); + // TODO validate version from string + // TODO validate service url format + // this is for later support for remote server inference + MLOG("Session GetContract end"); + return absl::OkStatus(); + } + + absl::Status Close(CalculatorContext* cc) final { + return absl::OkStatus(); + } + absl::Status Open(CalculatorContext* cc) final { + MLOG("Session Open start"); + for (CollectionItemId id = cc->Inputs().BeginId(); + id < cc->Inputs().EndId(); ++id) { + if (!cc->Inputs().Get(id).Header().IsEmpty()) { + cc->Outputs().Get(id).SetHeader(cc->Inputs().Get(id).Header()); + } + } + if (cc->OutputSidePackets().NumEntries() != 0) { + for (CollectionItemId id = cc->InputSidePackets().BeginId(); + id < cc->InputSidePackets().EndId(); ++id) { + cc->OutputSidePackets().Get(id).Set(cc->InputSidePackets().Get(id)); + } + } + cc->SetOffset(TimestampDiff(0)); + + const auto& options = cc->Options(); + const std::string& servableName = options.servable_name(); + const std::string& servableVersionStr = options.servable_version(); + auto servableVersionOpt = ::ovms::stou32(servableVersionStr); + // 0 means default + uint32_t servableVersion = servableVersionOpt.value_or(0); + auto session = std::make_unique(new OVMSInferenceAdapter(servableName, servableVersion)); + MLOG("Session create adapter"); + cc->OutputSidePackets().Tag(SESSION_TAG.c_str()).Set(Adopt(session.release())); + MLOG("SessionOpen end"); + return absl::OkStatus(); + } + + absl::Status Process(CalculatorContext* cc) final { + MLOG("SessionProcess"); + return absl::OkStatus(); + } +}; + +REGISTER_CALCULATOR(ModelAPISessionCalculator); +} // namespace mediapipe diff --git a/src/mediapipe_calculators/modelapiovmssessioncalculator.proto b/src/mediapipe_calculators/modelapiovmssessioncalculator.proto new file mode 100644 index 0000000000..6d6e624c4b --- /dev/null +++ b/src/mediapipe_calculators/modelapiovmssessioncalculator.proto @@ -0,0 +1,32 @@ +//***************************************************************************** +// Copyright 2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** + +syntax = "proto2"; +package mediapipe; + +import "mediapipe/framework/calculator.proto"; + +message ModelAPIOVMSSessionCalculatorOptions { + extend mediapipe.CalculatorOptions { + // https://github.com/google/mediapipe/issues/634 have to be unique in app + // no rule to obtain this + optional ModelAPIOVMSSessionCalculatorOptions ext = 113473744; + } + required string servable_name = 1; + required string servable_version = 2; + // service_url: "13.21.212.171:9718" + optional string service_url = 3; +} diff --git a/src/mediapipe_calculators/ovms_calculator.cc b/src/mediapipe_calculators/ovms_calculator.cc new file mode 100644 index 0000000000..727a9f61c4 --- /dev/null +++ b/src/mediapipe_calculators/ovms_calculator.cc @@ -0,0 +1,279 @@ +//***************************************************************************** +// Copyright 2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include +#include +#include + +#include + +#include "../ovms.h" // NOLINT +#include "../stringutils.hpp" // TODO dispose +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/canonical_errors.h" +#include "src/mediapipe_calculators/ovmscalculator.pb.h" +// here we need to decide if we have several calculators (1 for OVMS repository, 1-N inside mediapipe) +// for the one inside OVMS repo it makes sense to reuse code from ovms lib +namespace mediapipe { +#define MLOG(A) LOG(ERROR) << __FILE__ << ":" << __LINE__ << " " << A << std::endl; + +using std::endl; + +namespace { +#define ASSERT_CAPI_STATUS_NULL(C_API_CALL) \ + { \ + auto* err = C_API_CALL; \ + if (err != nullptr) { \ + uint32_t code = 0; \ + const char* msg = nullptr; \ + OVMS_StatusGetCode(err, &code); \ + OVMS_StatusGetDetails(err, &msg); \ + LOG(ERROR) << "Error encountred in OVMSCalculator:" << msg << " code: " << code; \ + OVMS_StatusDelete(err); \ + RET_CHECK(err == nullptr); \ + } \ + } +#define CREATE_GUARD(GUARD_NAME, CAPI_TYPE, CAPI_PTR) \ + std::unique_ptr GUARD_NAME(CAPI_PTR, &(CAPI_TYPE##Delete)); + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-function" +static ov::element::Type_t CAPI2OVPrecision(OVMS_DataType datatype) { + static std::unordered_map precisionMap{ + {OVMS_DATATYPE_FP64, ov::element::Type_t::f64}, + {OVMS_DATATYPE_FP32, ov::element::Type_t::f32}, + {OVMS_DATATYPE_FP16, ov::element::Type_t::f16}, + {OVMS_DATATYPE_I64, ov::element::Type_t::i64}, + {OVMS_DATATYPE_I32, ov::element::Type_t::i32}, + {OVMS_DATATYPE_I16, ov::element::Type_t::i16}, + {OVMS_DATATYPE_I8, ov::element::Type_t::i8}, + {OVMS_DATATYPE_I4, ov::element::Type_t::i4}, + {OVMS_DATATYPE_U64, ov::element::Type_t::u64}, + {OVMS_DATATYPE_U32, ov::element::Type_t::u32}, + {OVMS_DATATYPE_U16, ov::element::Type_t::u16}, + {OVMS_DATATYPE_U8, ov::element::Type_t::u8}, + {OVMS_DATATYPE_U4, ov::element::Type_t::u4}, + {OVMS_DATATYPE_U1, ov::element::Type_t::u1}, + {OVMS_DATATYPE_BOOL, ov::element::Type_t::boolean}, + {OVMS_DATATYPE_BF16, ov::element::Type_t::bf16}, + {OVMS_DATATYPE_UNDEFINED, ov::element::Type_t::undefined}, + {OVMS_DATATYPE_DYNAMIC, ov::element::Type_t::dynamic} + // {OVMS_DATATYPE_MIXED, ov::element::Type_t::MIXED}, + // {OVMS_DATATYPE_Q78, ov::element::Type_t::Q78}, + // {OVMS_DATATYPE_BIN, ov::element::Type_t::BIN}, + // {OVMS_DATATYPE_CUSTOM, ov::element::Type_t::CUSTOM + }; + auto it = precisionMap.find(datatype); + if (it == precisionMap.end()) { + return ov::element::Type_t::undefined; + } + return it->second; +} + +static OVMS_DataType OVPrecision2CAPI(ov::element::Type_t datatype) { + static std::unordered_map precisionMap{ + {ov::element::Type_t::f64, OVMS_DATATYPE_FP64}, + {ov::element::Type_t::f32, OVMS_DATATYPE_FP32}, + {ov::element::Type_t::f16, OVMS_DATATYPE_FP16}, + {ov::element::Type_t::i64, OVMS_DATATYPE_I64}, + {ov::element::Type_t::i32, OVMS_DATATYPE_I32}, + {ov::element::Type_t::i16, OVMS_DATATYPE_I16}, + {ov::element::Type_t::i8, OVMS_DATATYPE_I8}, + {ov::element::Type_t::i4, OVMS_DATATYPE_I4}, + {ov::element::Type_t::u64, OVMS_DATATYPE_U64}, + {ov::element::Type_t::u32, OVMS_DATATYPE_U32}, + {ov::element::Type_t::u16, OVMS_DATATYPE_U16}, + {ov::element::Type_t::u8, OVMS_DATATYPE_U8}, + {ov::element::Type_t::u4, OVMS_DATATYPE_U4}, + {ov::element::Type_t::u1, OVMS_DATATYPE_U1}, + {ov::element::Type_t::boolean, OVMS_DATATYPE_BOOL}, + {ov::element::Type_t::bf16, OVMS_DATATYPE_BF16}, + {ov::element::Type_t::undefined, OVMS_DATATYPE_UNDEFINED}, + {ov::element::Type_t::dynamic, OVMS_DATATYPE_DYNAMIC} + // {ov::element::Type_t::, OVMS_DATATYPE_MIXEDMIXED}, + // {ov::element::Type_t::, OVMS_DATATYPE_Q78Q78}, + // {ov::element::Type_t::, OVMS_DATATYPE_BINBIN}, + // {ov::element::Type_t::, OVMS_DATATYPE_CUSTOMCUSTOM + }; + auto it = precisionMap.find(datatype); + if (it == precisionMap.end()) { + return OVMS_DATATYPE_UNDEFINED; + } + return it->second; +} + +static ov::Tensor* makeOvTensor(OVMS_DataType datatype, const int64_t* shape, uint32_t dimCount, const void* voutputData, size_t bytesize) { + ov::Shape ovShape; + for (size_t i = 0; i < dimCount; ++i) { + ovShape.push_back(shape[i]); + } + // here we make copy of underlying OVMS repsonse tensor + ov::Tensor* output = new ov::Tensor(CAPI2OVPrecision(datatype), ovShape); + std::memcpy(output->data(), voutputData, bytesize); + return output; +} +static ov::Tensor makeOvTensorO(OVMS_DataType datatype, const int64_t* shape, uint32_t dimCount, const void* voutputData, size_t bytesize) { + ov::Shape ovShape; + for (size_t i = 0; i < dimCount; ++i) { + ovShape.push_back(shape[i]); + } + // here we make copy of underlying OVMS repsonse tensor + ov::Tensor output(CAPI2OVPrecision(datatype), ovShape); + std::memcpy(output.data(), voutputData, bytesize); + return output; +} +} // namespace + +class OVMSOVCalculator : public CalculatorBase { + OVMS_Server* cserver{nullptr}; + OVMS_ServerSettings* _serverSettings{nullptr}; + OVMS_ModelsSettings* _modelsSettings{nullptr}; + std::unordered_map outputNameToTag; + +public: + static absl::Status GetContract(CalculatorContract* cc) { + RET_CHECK(!cc->Inputs().GetTags().empty()); + RET_CHECK(!cc->Outputs().GetTags().empty()); + for (const std::string& tag : cc->Inputs().GetTags()) { + cc->Inputs().Tag(tag).Set(); + } + for (const std::string& tag : cc->Outputs().GetTags()) { + cc->Outputs().Tag(tag).Set(); + } + const auto& options = cc->Options(); + RET_CHECK(!options.servable_name().empty()); + // TODO validate version from string + // TODO validate service url format + RET_CHECK(options.config_path().empty() || + options.service_url().empty()); + // TODO validate tag_to_tensor maps so that key fulfill regex + return absl::OkStatus(); + } + + absl::Status Close(CalculatorContext* cc) final { + const auto& options = cc->Options(); + if (!options.config_path().empty()) { + OVMS_ModelsSettingsDelete(_modelsSettings); + OVMS_ServerSettingsDelete(_serverSettings); + // Close is called on input node and output node in initial pipeline + // Commented out since for now this happens twice in 2 nodes graph. Server will close + // OVMS_ServerDelete(cserver); TODO this should happen onlif graph is used once + // moreover we may need several ovms calculators use in graph each providing its own model? how to handle then different model inputs, as wel as config? + } + return absl::OkStatus(); + } + absl::Status Open(CalculatorContext* cc) final { + for (CollectionItemId id = cc->Inputs().BeginId(); id < cc->Inputs().EndId(); ++id) { + if (!cc->Inputs().Get(id).Header().IsEmpty()) { + cc->Outputs().Get(id).SetHeader(cc->Inputs().Get(id).Header()); + } + } + if (cc->OutputSidePackets().NumEntries() != 0) { + for (CollectionItemId id = cc->InputSidePackets().BeginId(); id < cc->InputSidePackets().EndId(); ++id) { + cc->OutputSidePackets().Get(id).Set(cc->InputSidePackets().Get(id)); + } + } + cc->SetOffset(TimestampDiff(0)); + + const auto& options = cc->Options(); + OVMS_ServerNew(&cserver); + if (!options.config_path().empty()) { + OVMS_ServerSettingsNew(&_serverSettings); + OVMS_ModelsSettingsNew(&_modelsSettings); + OVMS_ModelsSettingsSetConfigPath(_modelsSettings, options.config_path().c_str()); + OVMS_ServerSettingsSetLogLevel(_serverSettings, OVMS_LOG_DEBUG); + OVMS_ServerStartFromConfigurationFile(cserver, _serverSettings, _modelsSettings); + } + for (const auto& [key, value] : options.tag_to_output_tensor_names()) { + outputNameToTag[value] = key; + } + return absl::OkStatus(); + } + + absl::Status Process(CalculatorContext* cc) final { + const auto& options = cc->Options(); + ///////////////////// + // PREPARE REQUEST + ///////////////////// + OVMS_InferenceRequest* request{nullptr}; + auto servableVersionOpt = ovms::stou32(options.servable_version().c_str()); + uint64_t servableVersion = servableVersionOpt.value_or(0); + ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestNew(&request, cserver, options.servable_name().c_str(), servableVersion)); + CREATE_GUARD(requestGuard, OVMS_InferenceRequest, request); + + // PREPARE EACH INPUT + // extract single tensor + const auto inputTagInputMap = options.tag_to_input_tensor_names(); + const auto inputTagOutputMap = options.tag_to_output_tensor_names(); + for (const std::string& tag : cc->Inputs().GetTags()) { + // TODO validate existence of tag key in map + const char* realInputName = inputTagInputMap.at(tag).c_str(); + + auto& packet = cc->Inputs().Tag(tag).Get(); + ov::Tensor input_tensor(packet); + const float* input_tensor_access = reinterpret_cast(input_tensor.data()); + std::stringstream ss; + ss << "Calculator received tensor: [ "; + for (int x = 0; x < 10; ++x) { + ss << input_tensor_access[x] << " "; + } + ss << " ] timestamp: " << cc->InputTimestamp().DebugString(); + MLOG(ss.str()); + const auto& ovInputShape = input_tensor.get_shape(); + std::vector inputShape(ovInputShape.begin(), ovInputShape.end()); // TODO ensure ov tensors shapes conversions return error in all calcs + OVMS_DataType inputDataType = OVPrecision2CAPI(input_tensor.get_element_type()); + ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestAddInput(request, realInputName, inputDataType, inputShape.data(), inputShape.size())); + const uint32_t notUsedNum = 0; + // TODO handle hardcoded buffertype, notUsedNum additional options? side packets? + ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestInputSetData(request, + realInputName, + reinterpret_cast(input_tensor.data()), + input_tensor.get_byte_size(), + OVMS_BUFFERTYPE_CPU, + notUsedNum)); + } + ////////////////// + // INFERENCE + ////////////////// + OVMS_InferenceResponse* response = nullptr; + ASSERT_CAPI_STATUS_NULL(OVMS_Inference(cserver, request, &response)); + CREATE_GUARD(responseGuard, OVMS_InferenceResponse, response); + // verify GetOutputCount + uint32_t outputCount = 42; + ASSERT_CAPI_STATUS_NULL(OVMS_InferenceResponseGetOutputCount(response, &outputCount)); + RET_CHECK(outputCount == cc->Outputs().GetTags().size()); + uint32_t parameterCount = 42; + ASSERT_CAPI_STATUS_NULL(OVMS_InferenceResponseGetParameterCount(response, ¶meterCount)); + // TODO handle output filtering. Graph definition could suggest + // that we are not interested in all outputs from OVMS Inference + const void* voutputData; + size_t bytesize = 42; + OVMS_DataType datatype = (OVMS_DataType)199; + const int64_t* shape{nullptr}; + size_t dimCount = 42; + OVMS_BufferType bufferType = (OVMS_BufferType)199; + uint32_t deviceId = 42; + const char* outputName{nullptr}; + for (size_t i = 0; i < outputCount; ++i) { + ASSERT_CAPI_STATUS_NULL(OVMS_InferenceResponseGetOutput(response, i, &outputName, &datatype, &shape, &dimCount, &voutputData, &bytesize, &bufferType, &deviceId)); + ov::Tensor* outOvTensor = makeOvTensor(datatype, shape, dimCount, voutputData, bytesize); + cc->Outputs().Tag(outputNameToTag.at(outputName)).Add(outOvTensor, cc->InputTimestamp()); + } + return absl::OkStatus(); + } +}; +#pragma GCC diagnostic pop +REGISTER_CALCULATOR(OVMSOVCalculator); +} // namespace mediapipe diff --git a/src/mediapipe_calculators/ovmscalculator.proto b/src/mediapipe_calculators/ovmscalculator.proto new file mode 100644 index 0000000000..3d2920560a --- /dev/null +++ b/src/mediapipe_calculators/ovmscalculator.proto @@ -0,0 +1,49 @@ +//***************************************************************************** +// Copyright 2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** + +syntax = "proto2"; +package mediapipe; + +import "mediapipe/framework/calculator.proto"; + +message OVMSCalculatorOptions { + extend mediapipe.CalculatorOptions { + // https://github.com/google/mediapipe/issues/634 have to be unique in app + // no rule to obtain this + optional OVMSCalculatorOptions ext = 113473742; + } + required string servable_name = 1; + // [1-9][0-9]* + required string servable_version = 2; // TODO decide if follow KFS proto or use int + // keep mappings separately in case input/output tensor have the same name +/* + tag_to_input_tensor_names {$ + key: "INPUT"$ + value: "input_tensor/input_features:0"$ + }$ + tag_to_input_tensor_names {$ + key: "DATA"$ + value: "data_tensor"$ + }$ +*/ + map tag_to_input_tensor_names = 3; + map tag_to_output_tensor_names = 4; + // service_url: "13.21.212.171:9718" + optional string service_url = 5; + // config_path: "/models/config.json" + // TODO decide if it should be at all possible since OVMS does not have a C-API way to stop and then restart + optional string config_path = 6; +} diff --git a/src/mediapipe_internal/mediapipefactory.cpp b/src/mediapipe_internal/mediapipefactory.cpp new file mode 100644 index 0000000000..1c00e82b7d --- /dev/null +++ b/src/mediapipe_internal/mediapipefactory.cpp @@ -0,0 +1,122 @@ +//***************************************************************************** +// Copyright 2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include "mediapipefactory.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wall" +#include "tensorflow_serving/apis/prediction_service.grpc.pb.h" +#pragma GCC diagnostic pop +#include "../kfs_frontend/kfs_grpc_inference_service.hpp" +#include "../logging.hpp" +#include "../modelmanager.hpp" +#include "../status.hpp" +#include "mediapipegraphdefinition.hpp" + +namespace ovms { + +Status MediapipeFactory::createDefinition(const std::string& pipelineName, + const MediapipeGraphConfig& config, + ModelManager& manager) { + if (definitionExists(pipelineName)) { + SPDLOG_LOGGER_ERROR(modelmanager_logger, "Mediapipe graph definition: {} is already created", pipelineName); + return StatusCode::PIPELINE_DEFINITION_ALREADY_EXIST; + } + std::shared_ptr graphDefinition = std::make_shared(pipelineName, config, manager.getMetricRegistry(), &manager.getMetricConfig()); + auto stat = graphDefinition->validate(manager); + std::unique_lock lock(definitionsMtx); + definitions.insert({pipelineName, std::move(graphDefinition)}); // TODO check if inserted + return stat; +} + +bool MediapipeFactory::definitionExists(const std::string& name) const { + std::shared_lock lock(definitionsMtx); + return this->definitions.find(name) != this->definitions.end(); +} + +MediapipeGraphDefinition* MediapipeFactory::findDefinitionByName(const std::string& name) const { + std::shared_lock lock(definitionsMtx); + auto it = definitions.find(name); + if (it == std::end(definitions)) { + return nullptr; + } else { + return it->second.get(); + } +} + +Status MediapipeFactory::reloadDefinition(const std::string& name, + const MediapipeGraphConfig& config, + ModelManager& manager) { + auto mgd = findDefinitionByName(name); + if (mgd == nullptr) { + SPDLOG_LOGGER_ERROR(modelmanager_logger, "Requested to reload mediapipe graph definition but it does not exist: {}", name); + return StatusCode::INTERNAL_ERROR; + } + SPDLOG_LOGGER_INFO(modelmanager_logger, "Reloading mediapipe graph: {}", name); + return mgd->reload(manager, config); +} + +Status MediapipeFactory::create(std::shared_ptr& pipeline, + const std::string& name, + const KFSRequest* request, + KFSResponse* response, + ModelManager& manager) const { + std::shared_lock lock(definitionsMtx); + if (!definitionExists(name)) { + SPDLOG_LOGGER_DEBUG(dag_executor_logger, "Mediapipe with requested name: {} does not exist", name); + return StatusCode::MEDIAPIPE_DEFINITION_NAME_MISSING; + } + auto& definition = *definitions.at(name); + auto status = definition.create(pipeline, request, response); + return status; +} + +void MediapipeFactory::retireOtherThan(std::set&& graphsInConfigFile, ModelManager& manager) { + std::for_each(definitions.begin(), + definitions.end(), + [&graphsInConfigFile, &manager](auto& nameDefinitionPair) { + if (graphsInConfigFile.find(nameDefinitionPair.second->getName()) == graphsInConfigFile.end() && nameDefinitionPair.second->getStateCode() != PipelineDefinitionStateCode::RETIRED) { + nameDefinitionPair.second->retire(manager); + } + }); +} + +Status MediapipeFactory::revalidatePipelines(ModelManager&) { + // TODO potentially not needed if we do not validate presence of models required in pbtxt + SPDLOG_LOGGER_WARN(modelmanager_logger, "revalidation of mediapipe graphs not implemented yet"); + return StatusCode::OK; +} + +const std::vector MediapipeFactory::getMediapipePipelinesNames() const { + std::vector names; + std::shared_lock lock(definitionsMtx); + names.reserve(definitions.size()); + for (auto& [name, definition] : definitions) { + names.push_back(definition->getName()); + } + return names; +} + +MediapipeFactory::~MediapipeFactory() = default; +} // namespace ovms diff --git a/src/mediapipe_internal/mediapipefactory.hpp b/src/mediapipe_internal/mediapipefactory.hpp new file mode 100644 index 0000000000..5c269987fc --- /dev/null +++ b/src/mediapipe_internal/mediapipefactory.hpp @@ -0,0 +1,78 @@ +//***************************************************************************** +// Copyright 2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wall" +#include "tensorflow_serving/apis/prediction_service.grpc.pb.h" +#pragma GCC diagnostic pop +#include "../kfs_frontend/kfs_grpc_inference_service.hpp" + +namespace ovms { + +class ModelManager; +class Status; +class MediapipeGraphConfig; +class MediapipeGraphDefinition; +class MediapipeGraphExecutor; + +class MediapipeFactory { + std::map> definitions; + mutable std::shared_mutex definitionsMtx; + +public: + Status createDefinition(const std::string& pipelineName, + const MediapipeGraphConfig& config, + ModelManager& manager); + + bool definitionExists(const std::string& name) const; + +private: + template + Status createInternal(std::unique_ptr& pipeline, + const std::string& name, + const RequestType* request, + ResponseType* response, + ModelManager& manager) const; + +public: + Status create(std::shared_ptr& pipeline, + const std::string& name, + const KFSRequest* request, + KFSResponse* response, + ModelManager& manager) const; + + MediapipeGraphDefinition* findDefinitionByName(const std::string& name) const; + Status reloadDefinition(const std::string& pipelineName, + const MediapipeGraphConfig& config, + ModelManager& manager); + + void retireOtherThan(std::set&& pipelinesInConfigFile, ModelManager& manager); + Status revalidatePipelines(ModelManager&); + const std::vector getMediapipePipelinesNames() const; + ~MediapipeFactory(); +}; + +} // namespace ovms diff --git a/src/mediapipe_internal/mediapipegraphconfig.cpp b/src/mediapipe_internal/mediapipegraphconfig.cpp new file mode 100644 index 0000000000..d5779d94c3 --- /dev/null +++ b/src/mediapipe_internal/mediapipegraphconfig.cpp @@ -0,0 +1,33 @@ +//***************************************************************************** +// Copyright 2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include "mediapipegraphconfig.hpp" + +#include "../filesystem.hpp" + +namespace ovms { +void MediapipeGraphConfig::setBasePath(const std::string& basePath) { + FileSystem::setPath(this->basePath, basePath, this->rootDirectoryPath); +} + +void MediapipeGraphConfig::setGraphPath(const std::string& graphPath) { + FileSystem::setPath(this->graphPath, graphPath, this->basePath); +} + +void MediapipeGraphConfig::setSubconfigPath(const std::string& subconfigPath) { + FileSystem::setPath(this->subconfigPath, subconfigPath, this->basePath); +} + +} // namespace ovms diff --git a/src/mediapipe_internal/mediapipegraphconfig.hpp b/src/mediapipe_internal/mediapipegraphconfig.hpp new file mode 100644 index 0000000000..15ecd786e9 --- /dev/null +++ b/src/mediapipe_internal/mediapipegraphconfig.hpp @@ -0,0 +1,245 @@ +//***************************************************************************** +// Copyright 2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once + +#include + +#include +#include +#include +#include +#include + +#include "../status.hpp" + +namespace ovms { + +/** + * @brief This class represents Mediapie Graph configuration + */ +class MediapipeGraphConfig { +private: + /** + * @brief Mediapipe Graph Name + */ + std::string graphName; + + /** + * @brief Mediapipe Base Path + */ + std::string basePath; + + /** + * @brief Mediapipe Graph Path + */ + std::string graphPath; + + /** + * @brief Flag determing should we pass whole KFSrequest + */ + bool passKfsRequest; + + /** + * @brief Json config directory path + */ + std::string rootDirectoryPath; + + /** + * @brief Json config path + */ + std::string subconfigPath; + +public: + /** + * @brief Construct a new Mediapie Graph configuration object + * + * @param name + * @param basePath + * @param graphPath + * @param passKfsRequest + * @param subconfigPath + */ + MediapipeGraphConfig(const std::string& graphName = "", + const std::string& basePath = "", + const std::string& graphPath = "", + const bool passKfsRequest = false, + const std::string& subconfigPath = "") : + graphName(graphName), + basePath(basePath), + graphPath(graphPath), + passKfsRequest(passKfsRequest), + subconfigPath(subconfigPath) { + } + + void clear() { + graphName.clear(); + graphPath.clear(); + passKfsRequest = false; + } + + /** + * @brief Get the Graph name + * + * @return const std::string& + */ + const std::string& getGraphName() const { + return this->graphName; + } + + /** + * @brief Set the Graph name + * + * @param name + */ + void setGraphName(const std::string& graphName) { + this->graphName = graphName; + } + + /** + * @brief Get the Graph Path + * + * @return const std::string& + */ + const std::string& getGraphPath() const { + return this->graphPath; + } + + /** + * @brief Get the Base Path + * + * @return const std::string& + */ + const std::string& getBasePath() const { + return this->basePath; + } + + /** + * @brief Set the Graph Path + * + * @param graphPath + */ + void setGraphPath(const std::string& graphPath); + + /** + * @brief Set the Base Path + * + * @param basePath + */ + void setBasePath(const std::string& basePath); + + /** + * @brief Get the ModelsConfig Path + * + * @return const std::string& + */ + const std::string& getSubconfigPath() const { + return this->subconfigPath; + } + + /** + * @brief Set the Models Config Path + * + * @param subconfigPath + */ + void setSubconfigPath(const std::string& subconfigPath); + + /** + * @brief Set root directory path + * + * @param rootDirectoryPath + */ + void setRootDirectoryPath(const std::string& rootDirectoryPath) { + this->rootDirectoryPath = rootDirectoryPath; + } + + /** + * @brief Get the root directory path + * + * @return const std::string& + */ + const std::string& getRootDirectoryPath() const { + return this->rootDirectoryPath; + } + + /** + * @brief Get the passKfsRequest + * + * @return const bool + */ + const bool getPassKfsRequestFlag() const { + return this->passKfsRequest; + } + + /** + * @brief Set the PassKfsRequestFlag + * + * @param passKfsRequest + */ + void setPassKfsRequestFlag(const bool passKfsRequest) { + this->passKfsRequest = passKfsRequest; + } + + /** + * @brief Parses all settings from a JSON node + * + * @return Status + */ + Status parseNode(const rapidjson::Value& v) { + try { + this->setGraphName(v["name"].GetString()); + if (v.HasMember("base_path")) { + std::string providedBasePath(v["base_path"].GetString()); + if (providedBasePath.back() == '/') + this->setBasePath(providedBasePath); + else + this->setBasePath(providedBasePath + "/"); + } else { + if (!getRootDirectoryPath().empty()) { + this->setBasePath(getRootDirectoryPath()); + SPDLOG_DEBUG("base_path not defined in config so it will be set to default based on main config directory: {}", this->getBasePath()); + } else { + SPDLOG_ERROR("Mediapipe {} root directory path is not set.", getGraphName()); + return StatusCode::INTERNAL_ERROR; + } + } + if (v.HasMember("graph_path")) { + this->setGraphPath(v["graph_path"].GetString()); + } else { + this->setGraphPath(basePath + "graph.pbtxt"); + SPDLOG_DEBUG("graph_path not defined in config so it will be set to default based on base_path and graph name: {}", this->getGraphPath()); + } + if (v.HasMember("graph_pass_kfs_request")) + this->setPassKfsRequestFlag(v["graph_pass_kfs_request"].GetBool()); + else + this->setPassKfsRequestFlag(false); + if (v.HasMember("subconfig")) { + this->setSubconfigPath(v["subconfig"].GetString()); + } else { + std::string defaultSubconfigPath = getBasePath() + "subconfig.json"; + SPDLOG_DEBUG("No subconfig path was provided for graph: {} so default subconfig file: {} will be loaded.", getGraphName(), defaultSubconfigPath); + this->setSubconfigPath(defaultSubconfigPath); + } + } catch (std::logic_error& e) { + SPDLOG_DEBUG("Relative path error: {}", e.what()); + return StatusCode::INTERNAL_ERROR; + } catch (...) { + SPDLOG_ERROR("There was an error parsing the mediapipe graph config"); + return StatusCode::JSON_INVALID; + } + return StatusCode::OK; + } +}; +} // namespace ovms diff --git a/src/mediapipe_internal/mediapipegraphdefinition.cpp b/src/mediapipe_internal/mediapipegraphdefinition.cpp new file mode 100644 index 0000000000..df80e1b2e7 --- /dev/null +++ b/src/mediapipe_internal/mediapipegraphdefinition.cpp @@ -0,0 +1,215 @@ +//***************************************************************************** +// Copyright 2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include "mediapipegraphdefinition.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include "../deserialization.hpp" +#include "../execution_context.hpp" +#include "../kfs_frontend/kfs_utils.hpp" +#include "../metric.hpp" +#include "../modelmanager.hpp" +#include "../ov_utils.hpp" +#include "../serialization.hpp" +#include "../status.hpp" +#include "../stringutils.hpp" +#include "../tensorinfo.hpp" +#include "../timer.hpp" +#include "../version.hpp" +#include "mediapipe/framework/port/parse_text_proto.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipegraphexecutor.hpp" + +namespace ovms { +MediapipeGraphConfig MediapipeGraphDefinition::MGC; + +const std::string MediapipeGraphDefinition::SCHEDULER_CLASS_NAME{"Mediapipe"}; + +const tensor_map_t MediapipeGraphDefinition::getInputsInfo() const { + std::shared_lock lock(metadataMtx); + return this->inputsInfo; +} + +const tensor_map_t MediapipeGraphDefinition::getOutputsInfo() const { + std::shared_lock lock(metadataMtx); + return this->outputsInfo; +} + +Status MediapipeGraphDefinition::validateForConfigFileExistence() { + std::ifstream ifs(this->mgconfig.getGraphPath()); + if (!ifs.is_open()) { + SPDLOG_LOGGER_ERROR(modelmanager_logger, "Failed to open mediapipe graph definition: {}, file: {}\n", this->getName(), this->mgconfig.getGraphPath()); + return StatusCode::FILE_INVALID; + } + this->chosenConfig.clear(); + ifs.seekg(0, std::ios::end); + this->chosenConfig.reserve(ifs.tellg()); + ifs.seekg(0, std::ios::beg); + + this->chosenConfig.assign((std::istreambuf_iterator(ifs)), + std::istreambuf_iterator()); + return StatusCode::OK; +} + +Status MediapipeGraphDefinition::validateForConfigLoadableness() { + bool success = ::google::protobuf::TextFormat::ParseFromString(chosenConfig, &this->config); + if (!success) { + SPDLOG_LOGGER_ERROR(modelmanager_logger, "Trying to parse mediapipe graph definition: {} failed", this->getName(), this->chosenConfig); + return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID; + } + return StatusCode::OK; +} + +Status MediapipeGraphDefinition::validate(ModelManager& manager) { + SPDLOG_LOGGER_DEBUG(modelmanager_logger, "Started validation of mediapipe: {}", getName()); + ValidationResultNotifier notifier(this->status, this->loadedNotify); + Status validationResult = validateForConfigFileExistence(); + if (!validationResult.ok()) { + return validationResult; + } + validationResult = validateForConfigLoadableness(); + if (!validationResult.ok()) { + return validationResult; + } + // TODO + // 3 validate 1<= outputs + // 4 validate 1<= inputs + // 5 validate no side_packets? + ::mediapipe::CalculatorGraphConfig proto; + std::unique_lock lock(metadataMtx); + auto status = createInputsInfo(); + if (!status.ok()) { + SPDLOG_LOGGER_ERROR(modelmanager_logger, "Failed to create inputs info for mediapipe graph definition: {}", getName()); + return status; + } + status = createOutputsInfo(); + if (!status.ok()) { + SPDLOG_LOGGER_ERROR(modelmanager_logger, "Failed to create outputs info for mediapipe graph definition: {}", getName()); + return status; + } + lock.unlock(); + notifier.passed = true; + SPDLOG_LOGGER_DEBUG(modelmanager_logger, "Finished validation of mediapipe: {}", getName()); + SPDLOG_LOGGER_INFO(modelmanager_logger, "Mediapipe: {} inputs: {}", getName(), getTensorMapString(inputsInfo)); + SPDLOG_LOGGER_INFO(modelmanager_logger, "Mediapipe: {} inputs: {}", getName(), outputsInfo.size()); + SPDLOG_LOGGER_INFO(modelmanager_logger, "Mediapipe: {} outputs: {}", getName(), getTensorMapString(outputsInfo)); + return StatusCode::OK; +} + +MediapipeGraphDefinition::MediapipeGraphDefinition(const std::string name, + const MediapipeGraphConfig& config, + MetricRegistry* registry, + const MetricConfig* metricConfig) : + name(name), + status(SCHEDULER_CLASS_NAME, this->name) { + mgconfig = config; +} + +Status MediapipeGraphDefinition::createInputsInfo() { + inputsInfo.clear(); + for (auto& name : config.input_stream()) { + inputsInfo.insert({name, TensorInfo::getUnspecifiedTensorInfo()}); + } + return StatusCode::OK; +} + +Status MediapipeGraphDefinition::createOutputsInfo() { + outputsInfo.clear(); + for (auto& name : this->config.output_stream()) { + outputsInfo.insert({name, TensorInfo::getUnspecifiedTensorInfo()}); + } + return StatusCode::OK; +} + +Status MediapipeGraphDefinition::create(std::shared_ptr& pipeline, const KFSRequest* request, KFSResponse* response) { + std::unique_ptr unloadGuard; + Status status = waitForLoaded(unloadGuard); + if (!status.ok()) { + SPDLOG_DEBUG("Failed to execute mediapipe graph: {} since it is not available", getName()); + return status; + } + SPDLOG_DEBUG("Creating Mediapipe graph executor: {}", getName()); + pipeline = std::make_shared(getName(), std::to_string(getVersion()), this->config); + return status; +} + +Status MediapipeGraphDefinition::reload(ModelManager& manager, const MediapipeGraphConfig& config) { + // block creating new unloadGuards + this->status.handle(ReloadEvent()); + while (requestsHandlesCounter > 0) { + std::this_thread::sleep_for(std::chrono::microseconds(1)); + } + this->mgconfig = config; + return validate(manager); +} + +void MediapipeGraphDefinition::retire(ModelManager& manager) { + this->status.handle(RetireEvent()); +} + +Status MediapipeGraphDefinition::waitForLoaded(std::unique_ptr& unloadGuard, const uint waitForLoadedTimeoutMicroseconds) { + // TODO possibly unify with DAG to share code + unloadGuard = std::make_unique(*this); + + const uint waitLoadedTimestepMicroseconds = 1000; + const uint waitCheckpoints = waitForLoadedTimeoutMicroseconds / waitLoadedTimestepMicroseconds; + uint waitCheckpointsCounter = waitCheckpoints; + std::mutex cvMtx; + std::unique_lock cvLock(cvMtx); + while (waitCheckpointsCounter-- != 0) { + if (status.isAvailable()) { + SPDLOG_DEBUG("Successfully waited for mediapipe definition: {}", getName()); + return StatusCode::OK; + } + unloadGuard.reset(); + if (!status.canEndLoaded()) { + if (status.getStateCode() != PipelineDefinitionStateCode::RETIRED) { + SPDLOG_DEBUG("Waiting for mediapipe definition: {} ended due to timeout.", getName()); + return StatusCode::MEDIAPIPE_DEFINITION_NOT_LOADED_YET; + } else { + SPDLOG_DEBUG("Waiting for mediapipe definition: {} ended since it failed to load.", getName()); + return StatusCode::MEDIAPIPE_DEFINITION_NOT_LOADED_ANYMORE; + } + } + SPDLOG_DEBUG("Waiting for available state for mediapipe: {}, with timestep: {}us timeout: {}us check count: {}", + getName(), waitLoadedTimestepMicroseconds, waitForLoadedTimeoutMicroseconds, waitCheckpointsCounter); + loadedNotify.wait_for(cvLock, + std::chrono::microseconds(waitLoadedTimestepMicroseconds), + [this]() { + return this->status.isAvailable() || + !this->status.canEndLoaded(); + }); + unloadGuard = std::make_unique(*this); + } + if (!status.isAvailable()) { + if (status.getStateCode() != PipelineDefinitionStateCode::RETIRED) { + SPDLOG_DEBUG("Waiting for mediapipe definition: {} ended due to timeout.", getName()); + return StatusCode::MEDIAPIPE_DEFINITION_NOT_LOADED_YET; + } else { + SPDLOG_DEBUG("Waiting for mediapipe definition: {} ended since it failed to load.", getName()); + return StatusCode::MEDIAPIPE_DEFINITION_NOT_LOADED_ANYMORE; + } + } + SPDLOG_DEBUG("Succesfully waited for mediapipe definition: {}", getName()); + return StatusCode::OK; +} +} // namespace ovms diff --git a/src/mediapipe_internal/mediapipegraphdefinition.hpp b/src/mediapipe_internal/mediapipegraphdefinition.hpp new file mode 100644 index 0000000000..9ed4d70555 --- /dev/null +++ b/src/mediapipe_internal/mediapipegraphdefinition.hpp @@ -0,0 +1,142 @@ +//***************************************************************************** +// Copyright 2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once +#include +#include +#include +#include +#include +#include +#include + +#include "../dags/pipelinedefinitionstatus.hpp" +#include "../kfs_frontend/kfs_grpc_inference_service.hpp" +#include "../kfs_frontend/kfs_utils.hpp" +#include "../metric.hpp" +#include "../tensorinfo.hpp" +#include "../timer.hpp" +#include "../version.hpp" +#include "mediapipe/framework/calculator_graph.h" +#include "mediapipe/framework/port/parse_text_proto.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipegraphconfig.hpp" + +namespace ovms { +class MetricConfig; +class MetricRegistry; +class ModelManager; +class MediapipeGraphExecutor; +class Status; + +class MediapipeGraphDefinitionUnloadGuard; + +class MediapipeGraphDefinition { + friend MediapipeGraphDefinitionUnloadGuard; + struct ValidationResultNotifier { + ValidationResultNotifier(PipelineDefinitionStatus& status, std::condition_variable& loadedNotify) : + status(status), + loadedNotify(loadedNotify) { + } + ~ValidationResultNotifier() { + if (passed) { + status.handle(ValidationPassedEvent()); + loadedNotify.notify_all(); + } else { + status.handle(ValidationFailedEvent()); + } + } + bool passed = false; + + private: + PipelineDefinitionStatus& status; + std::condition_variable& loadedNotify; + }; + +public: + MediapipeGraphDefinition(const std::string name, + const MediapipeGraphConfig& config = MGC, + MetricRegistry* registry = nullptr, + const MetricConfig* metricConfig = nullptr); + + const std::string& getName() const { return name; } + const PipelineDefinitionStatus& getStatus() const { + return this->status; + } + const PipelineDefinitionStateCode getStateCode() const { return status.getStateCode(); } + const model_version_t getVersion() const { return VERSION; } + const tensor_map_t getInputsInfo() const; + const tensor_map_t getOutputsInfo() const; + + Status create(std::shared_ptr& pipeline, const KFSRequest* request, KFSResponse* response); + + Status reload(ModelManager& manager, const MediapipeGraphConfig& config); + Status validate(ModelManager& manager); + void retire(ModelManager& manager); + + static constexpr uint64_t WAIT_FOR_LOADED_DEFAULT_TIMEOUT_MICROSECONDS = 500000; + static const std::string SCHEDULER_CLASS_NAME; + Status waitForLoaded(std::unique_ptr& unloadGuard, const uint waitForLoadedTimeoutMicroseconds = WAIT_FOR_LOADED_DEFAULT_TIMEOUT_MICROSECONDS); + + // Pipelines are not versioned and any available definition has constant version equal 1. + static constexpr model_version_t VERSION = 1; + +protected: + Status validateForConfigFileExistence(); + Status validateForConfigLoadableness(); + +private: + Status createInputsInfo(); + Status createOutputsInfo(); + + void increaseRequestsHandlesCount() { + ++requestsHandlesCounter; + } + + void decreaseRequestsHandlesCount() { + --requestsHandlesCounter; + } + + static MediapipeGraphConfig MGC; + const std::string name; + + std::string chosenConfig; // TODO make const @atobiszei + MediapipeGraphConfig mgconfig; + ::mediapipe::CalculatorGraphConfig config; + + tensor_map_t inputsInfo; + tensor_map_t outputsInfo; + + PipelineDefinitionStatus status; + mutable std::shared_mutex metadataMtx; + std::atomic requestsHandlesCounter = 0; + std::condition_variable loadedNotify; +}; + +class MediapipeGraphDefinitionUnloadGuard { +public: + MediapipeGraphDefinitionUnloadGuard(MediapipeGraphDefinition& definition) : + definition(definition) { + definition.increaseRequestsHandlesCount(); + } + + ~MediapipeGraphDefinitionUnloadGuard() { + definition.decreaseRequestsHandlesCount(); + } + +private: + MediapipeGraphDefinition& definition; +}; +} // namespace ovms diff --git a/src/mediapipe_internal/mediapipegraphexecutor.cpp b/src/mediapipe_internal/mediapipegraphexecutor.cpp new file mode 100644 index 0000000000..9ac50b8cc7 --- /dev/null +++ b/src/mediapipe_internal/mediapipegraphexecutor.cpp @@ -0,0 +1,235 @@ +//***************************************************************************** +// Copyright 2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include "mediapipegraphexecutor.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../deserialization.hpp" +#include "../execution_context.hpp" +#include "../kfs_frontend/kfs_utils.hpp" +#include "../metric.hpp" +#include "../modelmanager.hpp" +#include "../serialization.hpp" +#include "../status.hpp" +#include "../stringutils.hpp" +#include "../tensorinfo.hpp" +#include "../timer.hpp" +#include "../version.hpp" +#include "mediapipe/framework/calculator_graph.h" +#include "mediapipe/framework/port/status.h" +#include "mediapipegraphdefinition.hpp" // for version in response + +namespace ovms { +static Status deserializeTensor(const std::string& requestedName, const std::string& requestedVersion, const KFSRequest* request, ov::Tensor& outTensor) { + auto requestInputItr = std::find_if(request->inputs().begin(), request->inputs().end(), [&requestedName](const ::KFSRequest::InferInputTensor& tensor) { return tensor.name() == requestedName; }); + if (requestInputItr == request->inputs().end()) { + std::stringstream ss; + ss << "Required input: " << requestedName; + const std::string details = ss.str(); + SPDLOG_DEBUG("[servable name: {} version: {}] Missing input with specific name - {}", request->model_name(), requestedVersion, details); + return Status(StatusCode::INVALID_MISSING_INPUT, details); + } + if (request->raw_input_contents().size() == 0 || request->raw_input_contents().size() != request->inputs().size()) { + std::stringstream ss; + ss << "Cannot find data in raw_input_content for input with name: " << requestedName; + const std::string details = ss.str(); + SPDLOG_DEBUG("[servable name: {} version: {}] Invalid message structure - {}", request->model_name(), requestedVersion, details); + return Status(StatusCode::INVALID_MESSAGE_STRUCTURE, details); + } + auto inputIndex = requestInputItr - request->inputs().begin(); + auto& bufferLocation = request->raw_input_contents().at(inputIndex); + try { + ov::Shape shape; + for (int i = 0; i < requestInputItr->shape().size(); i++) { + if (requestInputItr->shape()[i] <= 0) { + std::stringstream ss; + ss << "Negative or zero dimension size is not acceptable: " << tensorShapeToString(requestInputItr->shape()) << "; input name: " << requestedName; + const std::string details = ss.str(); + SPDLOG_DEBUG("[servable name: {} version: {}] Invalid shape - {}", request->model_name(), requestedVersion, details); + return Status(StatusCode::INVALID_SHAPE, details); + } + shape.push_back(requestInputItr->shape()[i]); + } + ov::element::Type precision = ovmsPrecisionToIE2Precision(KFSPrecisionToOvmsPrecision(requestInputItr->datatype())); + size_t expectElementsCount = ov::shape_size(shape.begin(), shape.end()); + size_t expectedBytes = precision.size() * expectElementsCount; + if (expectedBytes <= 0) { + std::stringstream ss; + ss << "Invalid precision with expected bytes equal to 0: " << requestInputItr->datatype() << "; input name: " << requestedName; + const std::string details = ss.str(); + SPDLOG_DEBUG("[servable name: {} version: {}] {}", request->model_name(), requestedVersion, details); + return Status(StatusCode::INVALID_PRECISION, details); + } + if (expectedBytes != bufferLocation.size()) { + std::stringstream ss; + ss << "Expected: " << expectedBytes << " bytes; Actual: " << bufferLocation.size() << " bytes; input name: " << requestedName; + const std::string details = ss.str(); + SPDLOG_DEBUG("[servable name: {} version: {}] Invalid content size of tensor proto - {}", request->model_name(), requestedVersion, details); + return Status(StatusCode::INVALID_CONTENT_SIZE, details); + } + outTensor = ov::Tensor(precision, shape, const_cast((const void*)bufferLocation.data())); + return StatusCode::OK; + } catch (const std::exception& e) { + SPDLOG_DEBUG("Kserve mediapipe request deserialization failed:{}", e.what()); + } catch (...) { + SPDLOG_DEBUG("KServe mediapipe request deserialization failed"); + } + return Status(StatusCode::INTERNAL_ERROR, "Unexpected error during Tensor creation"); +} + +MediapipeGraphExecutor::MediapipeGraphExecutor(const std::string& name, const std::string& version, const ::mediapipe::CalculatorGraphConfig& config) : + name(name), + version(version), + config(config) {} + +namespace { +enum : unsigned int { + INITIALIZE_GRAPH, + RUN_GRAPH, + ADD_INPUT_PACKET, + FETCH_OUTPUT, + ALL_FETCH, + TOTAL, + TIMER_END +}; +} // namespace + +static std::map createInputSidePackets(const KFSRequest* request) { + std::map inputSidePackets; + for (const auto& [name, valueChoice] : request->parameters()) { + if (valueChoice.parameter_choice_case() == inference::InferParameter::ParameterChoiceCase::kStringParam) { + inputSidePackets[name] = mediapipe::MakePacket(valueChoice.string_param()).At(mediapipe::Timestamp(0)); // TODO timestamp of side packets + } else if (valueChoice.parameter_choice_case() == inference::InferParameter::ParameterChoiceCase::kInt64Param) { + inputSidePackets[name] = mediapipe::MakePacket(valueChoice.int64_param()).At(mediapipe::Timestamp(0)); // TODO timestamp of side packets + } else if (valueChoice.parameter_choice_case() == inference::InferParameter::ParameterChoiceCase::kBoolParam) { + inputSidePackets[name] = mediapipe::MakePacket(valueChoice.bool_param()).At(mediapipe::Timestamp(0)); // TODO timestamp of side packets + } else { + SPDLOG_DEBUG("Handling parameters of different types than: bool, string, int64 is not supported"); + } + } + return inputSidePackets; +} + +Status MediapipeGraphExecutor::infer(const KFSRequest* request, KFSResponse* response, ExecutionContext executionContext, ServableMetricReporter*& reporterOut) const { + Timer timer; + SPDLOG_DEBUG("Start KServe request mediapipe graph: {} execution", request->model_name()); + ::mediapipe::CalculatorGraph graph; + auto absStatus = graph.Initialize(this->config); + if (!absStatus.ok()) { + const std::string absMessage = absStatus.ToString(); + SPDLOG_DEBUG("KServe request for mediapipe graph: {} initialization failed with message: {}", request->model_name(), absMessage); + return Status(StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR, std::move(absMessage)); + } + std::unordered_map outputPollers; + for (auto& name : this->config.output_stream()) { + auto absStatusOrPoller = graph.AddOutputStreamPoller(name); + if (!absStatusOrPoller.ok()) { + const std::string absMessage = absStatusOrPoller.status().ToString(); + return Status(StatusCode::MEDIAPIPE_GRAPH_ADD_OUTPUT_STREAM_ERROR, std::move(absMessage)); + } + outputPollers.emplace(name, std::move(absStatusOrPoller).value()); + } + auto inputNames = this->config.input_stream(); + std::map inputSidePackets{createInputSidePackets(request)}; + absStatus = graph.StartRun(inputSidePackets); + if (!absStatus.ok()) { + const std::string absMessage = absStatus.ToString(); + SPDLOG_DEBUG("Failed to start mediapipe graph: {} with error: {}", request->model_name(), absMessage); + return Status(StatusCode::MEDIAPIPE_GRAPH_START_ERROR, std::move(absMessage)); + } + if (inputNames.size() != request->inputs().size()) { + std::stringstream ss; + ss << "Expected: " << inputNames.size() << "; Actual: " << request->inputs().size(); + const std::string details = ss.str(); + SPDLOG_DEBUG("[servable name: {} version: {}] Invalid number of inputs - {}", request->model_name(), version, details); + return Status(StatusCode::INVALID_NO_OF_INPUTS, details); + } + for (auto name : inputNames) { + SPDLOG_DEBUG("Tensor to deserialize:\"{}\"", name); + ov::Tensor input_tensor; + auto status = deserializeTensor(name, version, request, input_tensor); + if (!status.ok()) { + SPDLOG_DEBUG("Failed to deserialize tensor: {}", name); + return status; + } + absStatus = graph.AddPacketToInputStream( + name, ::mediapipe::MakePacket(std::move(input_tensor)).At(::mediapipe::Timestamp(0))); + if (!absStatus.ok()) { + const std::string absMessage = absStatus.ToString(); + SPDLOG_DEBUG("Failed to add stream: {} packet to mediapipe graph: {} with error: {}", + name, request->model_name(), absStatus.message(), absStatus.raw_code()); + return Status(StatusCode::MEDIAPIPE_GRAPH_ADD_PACKET_INPUT_STREAM, std::move(absMessage)); + } + absStatus = graph.CloseInputStream(name); + if (!absStatus.ok()) { + const std::string absMessage = absStatus.ToString(); + SPDLOG_DEBUG("Failed to close stream: {} of mediapipe graph: {} with error: {}", + name, request->model_name(), absMessage); + return Status(StatusCode::MEDIAPIPE_GRAPH_CLOSE_INPUT_STREAM_ERROR, std::move(absMessage)); + } + } + // receive outputs + ::mediapipe::Packet packet; + std::set outputPollersWithReceivedPacket; + for (auto& [outputStreamName, poller] : outputPollers) { + size_t receivedOutputs = 0; + SPDLOG_DEBUG("Will wait for output stream: {} packet", outputStreamName); + while (poller.Next(&packet)) { + SPDLOG_DEBUG("Received packet from output stream: {}", outputStreamName); + auto received = packet.Get(); + auto* output = response->add_outputs(); + output->set_name(outputStreamName); + output->set_datatype( + ovmsPrecisionToKFSPrecision( + ovElementTypeToOvmsPrecision( + received.get_element_type()))); + output->clear_shape(); + for (const auto& dim : received.get_shape()) { + output->add_shape(dim); + } + response->add_raw_output_contents()->assign(reinterpret_cast(received.data()), received.get_byte_size()); + SPDLOG_TRACE("Received packet for: {} {}", outputStreamName, receivedOutputs); + outputPollersWithReceivedPacket.insert(outputStreamName); + ++receivedOutputs; + } + SPDLOG_TRACE("Received all packets for: {}", outputStreamName); + } + absStatus = graph.WaitUntilDone(); + if (!absStatus.ok()) { + const std::string absMessage = absStatus.ToString(); + SPDLOG_DEBUG("Mediapipe failed to execute: {}", absMessage); + return Status(StatusCode::MEDIAPIPE_EXECUTION_ERROR, absMessage); + } + if (outputPollers.size() != outputPollersWithReceivedPacket.size()) { + SPDLOG_DEBUG("Mediapipe failed to execute. Failed to receive all output packets"); + return Status(StatusCode::MEDIAPIPE_EXECUTION_ERROR, "Unknown error during mediapipe execution"); + } + SPDLOG_DEBUG("Received all output stream packets for graph: {}", request->model_name()); + response->set_model_name(name); + response->set_id(request->id()); + response->set_model_version(version); + return StatusCode::OK; +} + +} // namespace ovms diff --git a/src/mediapipe_internal/mediapipegraphexecutor.hpp b/src/mediapipe_internal/mediapipegraphexecutor.hpp new file mode 100644 index 0000000000..459b762dae --- /dev/null +++ b/src/mediapipe_internal/mediapipegraphexecutor.hpp @@ -0,0 +1,42 @@ +//***************************************************************************** +// Copyright 2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once +#include +#include +#include +#include +#include +#include + +#include "..//kfs_frontend/kfs_grpc_inference_service.hpp" +#include "../kfs_frontend/kfs_utils.hpp" +#include "../metric.hpp" +#include "mediapipe/framework/calculator_graph.h" +#include "mediapipe/framework/port/status.h" + +namespace ovms { +class Status; + +class MediapipeGraphExecutor { + const std::string name; + const std::string version; + const ::mediapipe::CalculatorGraphConfig config; + +public: + MediapipeGraphExecutor(const std::string& name, const std::string& version, const ::mediapipe::CalculatorGraphConfig& config); + Status infer(const KFSRequest* request, KFSResponse* response, ExecutionContext executionContext, ServableMetricReporter*& reporterOut) const; +}; +} // namespace ovms diff --git a/src/model.hpp b/src/model.hpp index 1cbe26597a..be0713df56 100644 --- a/src/model.hpp +++ b/src/model.hpp @@ -25,6 +25,7 @@ #include "logging.hpp" #include "modelchangesubscription.hpp" +#include "modelconfig.hpp" #include "modelversion.hpp" namespace ov { @@ -34,7 +35,6 @@ class Core; namespace ovms { class FileSystem; class GlobalSequencesViewer; -class ModelConfig; class ModelInstance; class PipelineDefinition; class MetricConfig; diff --git a/src/model_service.cpp b/src/model_service.cpp index e90d510c8c..e72183c55f 100644 --- a/src/model_service.cpp +++ b/src/model_service.cpp @@ -25,6 +25,7 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wall" +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" #include "tensorflow_serving/apis/get_model_status.pb.h" #include "tensorflow_serving/apis/model_service.grpc.pb.h" #include "tensorflow_serving/apis/model_service.pb.h" @@ -33,6 +34,10 @@ #include "dags/pipelinedefinition.hpp" #include "execution_context.hpp" #include "grpc_utils.hpp" +#if (MEDIAPIPE_DISABLE == 0) +#include "mediapipe_internal/mediapipefactory.hpp" +#include "mediapipe_internal/mediapipegraphdefinition.hpp" +#endif #include "modelinstance.hpp" #include "modelmanager.hpp" #include "servablemanagermodule.hpp" @@ -106,7 +111,18 @@ Status GetModelStatusImpl::getModelStatus( SPDLOG_DEBUG("GetModelStatus: Model {} is missing, trying to find pipeline with such name", requested_model_name); auto pipelineDefinition = manager.getPipelineFactory().findDefinitionByName(requested_model_name); if (!pipelineDefinition) { +#if (MEDIAPIPE_DISABLE == 0) + auto mediapipeGraphDefinition = manager.getMediapipeFactory().findDefinitionByName(requested_model_name); + if (!mediapipeGraphDefinition) { + return StatusCode::MODEL_NAME_MISSING; + } + addStatusToResponse(response, mediapipeGraphDefinition->getVersion(), mediapipeGraphDefinition->getStatus()); + SPDLOG_DEBUG("model_service: response: {}", response->DebugString()); + SPDLOG_DEBUG("MODEL_STATUS created a response for {} - {}", requested_model_name, requested_version); + return StatusCode::OK; +#else return StatusCode::MODEL_NAME_MISSING; +#endif } INCREMENT_IF_ENABLED(pipelineDefinition->getMetricReporter().getGetModelStatusRequestSuccessMetric(context)); @@ -181,6 +197,21 @@ Status GetModelStatusImpl::getAllModelsStatuses(std::map& mediapipePipelineNames = manager.getMediapipeFactory().getMediapipePipelinesNames(); + for (auto const& mediapipePipelineName : mediapipePipelineNames) { + std::optional noValueModelVersion; + tensorflow::serving::GetModelStatusRequest request; + GetModelStatusImpl::createGrpcRequest(mediapipePipelineName, noValueModelVersion, &request); + tensorflow::serving::GetModelStatusResponse response; + auto status = GetModelStatusImpl::getModelStatus(&request, &response, manager, context); + if (status != StatusCode::OK) { + // Same situation like with models. + continue; + } + modelsStatusesTmp.insert({mediapipePipelineName, response}); + } +#endif modelsStatuses.merge(modelsStatusesTmp); return StatusCode::OK; diff --git a/src/model_service.hpp b/src/model_service.hpp index fafc52e8d7..f736261f21 100644 --- a/src/model_service.hpp +++ b/src/model_service.hpp @@ -23,6 +23,7 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wall" +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" #include "tensorflow_serving/apis/get_model_status.pb.h" #include "tensorflow_serving/apis/model_service.grpc.pb.h" #include "tensorflow_serving/apis/model_service.pb.h" diff --git a/src/modelconfig.cpp b/src/modelconfig.cpp index b3e624b648..6e29ee16b8 100644 --- a/src/modelconfig.cpp +++ b/src/modelconfig.cpp @@ -26,7 +26,6 @@ #include #include -#include "logging.hpp" #include "model_version_policy.hpp" #include "schema.hpp" #include "stringutils.hpp" @@ -504,7 +503,12 @@ Status ModelConfig::parseModelMapping() { Status ModelConfig::parseNode(const rapidjson::Value& v) { this->setName(v["name"].GetString()); - this->setBasePath(v["base_path"].GetString()); + try { + this->setBasePath(v["base_path"].GetString()); + } catch (std::logic_error& e) { + SPDLOG_DEBUG("Relative path error: {}", e.what()); + return StatusCode::INTERNAL_ERROR; + } Status firstErrorStatus = StatusCode::OK; // Check for optional parameters diff --git a/src/modelconfig.hpp b/src/modelconfig.hpp index d9e4ab5c4b..b5cd7405fe 100644 --- a/src/modelconfig.hpp +++ b/src/modelconfig.hpp @@ -26,7 +26,9 @@ #include +#include "filesystem.hpp" #include "layout_configuration.hpp" +#include "logging.hpp" #include "metric_config.hpp" #include "modelversion.hpp" #include "shape.hpp" @@ -58,6 +60,11 @@ class ModelConfig { */ std::string basePath; + /** + * @brief Json config directory path + */ + std::string rootDirectoryPath; + /** * @brief Model local destination path on disk after downloading from online storage */ @@ -303,7 +310,16 @@ class ModelConfig { * @param basePath */ void setBasePath(const std::string& basePath) { - this->basePath = basePath; + FileSystem::setPath(this->basePath, basePath, this->rootDirectoryPath); + } + + /** + * @brief Set root directory path + * + * @param rootDirectoryPath + */ + void setRootDirectoryPath(const std::string& rootDirectoryPath) { + this->rootDirectoryPath = rootDirectoryPath; } /** diff --git a/src/modelinstance.cpp b/src/modelinstance.cpp index 2b5bc1de6c..3089faf079 100644 --- a/src/modelinstance.cpp +++ b/src/modelinstance.cpp @@ -25,6 +25,7 @@ #include #include +#include #include #include @@ -34,6 +35,8 @@ #include "deserialization.hpp" #include "executingstreamidguard.hpp" #include "filesystem.hpp" +#include "inferencerequest.hpp" +#include "inferenceresponse.hpp" #include "layout.hpp" #include "layout_configuration.hpp" #include "logging.hpp" @@ -65,9 +68,6 @@ enum : unsigned int { namespace ovms { -const char* CPU_THROUGHPUT_STREAMS = "CPU_THROUGHPUT_STREAMS"; -const char* NIREQ = "NIREQ"; - const uint MAX_NIREQ_COUNT = 100000; const uint UNLOAD_AVAILABILITY_CHECKING_INTERVAL_MILLISECONDS = 10; @@ -319,6 +319,15 @@ static Status applyLayoutConfiguration(const ModelConfig& config, std::shared_pt return StatusCode::OK; } +const std::string RT_INFO_KEY{"model_info"}; + +ov::AnyMap ModelInstance::getRTInfo() const { + if (this->model->has_rt_info(RT_INFO_KEY)) { + return model->get_rt_info(RT_INFO_KEY); + } + return ov::AnyMap(); +} + Status ModelInstance::loadTensors(const ModelConfig& config, bool needsToApplyLayoutConfiguration, const DynamicModelParameter& parameter) { Status status = validateConfigurationAgainstNetwork(config, this->model); if (!status.ok()) { @@ -424,7 +433,7 @@ Status ModelInstance::loadInputTensors(const ModelConfig& config, const DynamicM return StatusCode::LAYOUT_INCOMPATIBLE_WITH_SHAPE; } - std::shared_ptr info = std::make_shared( + std::shared_ptr info = std::make_shared( name, mappingName, precision, @@ -432,7 +441,6 @@ Status ModelInstance::loadInputTensors(const ModelConfig& config, const DynamicM layout); SPDLOG_LOGGER_INFO(modelmanager_logger, "Input {}", info->asString()); - this->inputsInfo[info->getMappedName()] = std::move(info); } catch (const ov::Exception& e) { SPDLOG_LOGGER_ERROR(modelmanager_logger, "Failed to get input name for model:{}; version:{}; from OpenVINO with error:{}", @@ -473,7 +481,7 @@ Status ModelInstance::loadOutputTensors(const ModelConfig& config) { return StatusCode::LAYOUT_INCOMPATIBLE_WITH_SHAPE; } - std::shared_ptr info = std::make_shared( + std::shared_ptr info = std::make_shared( name, mappingName, precision, @@ -592,7 +600,7 @@ Status ModelInstance::loadOVModel() { auto& modelFile = modelFiles[0]; SPDLOG_DEBUG("Try reading model file: {}", modelFile); try { - model = loadOVModelPtr(modelFile); + this->model = loadOVModelPtr(modelFile); } catch (std::exception& e) { SPDLOG_ERROR("Error: {}; occurred during loading ov::Model model: {} version: {}", e.what(), getName(), getVersion()); return StatusCode::INTERNAL_ERROR; @@ -658,11 +666,11 @@ void ModelInstance::loadCompiledModelPtr(const plugin_config_t& pluginConfig) { plugin_config_t ModelInstance::prepareDefaultPluginConfig(const ModelConfig& config) { plugin_config_t pluginConfig = config.getPluginConfig(); - // By default, set "PERFORMANCE_HINT" = "THROUGHPUT"; + // By default, set "PERFORMANCE_HINT" = "LATENCY"; if ((pluginConfig.count("NUM_STREAMS") == 1) || (pluginConfig.count("PERFORMANCE_HINT") == 1)) { return pluginConfig; } else { - pluginConfig["PERFORMANCE_HINT"] = "THROUGHPUT"; + pluginConfig["PERFORMANCE_HINT"] = "LATENCY"; } return pluginConfig; } @@ -741,6 +749,9 @@ void ModelInstance::fetchModelFiles(bool& found, ArrayType ext) { if (file.empty()) { found = false; } + if (endsWith(file, "saved_model.pb")) { + file = file.substr(0, file.find("saved_model.pb")); + } modelFiles.push_back(file); } } @@ -854,6 +865,12 @@ Status ModelInstance::loadModelImpl(const ModelConfig& config, const DynamicMode this->status.setLoading(ModelVersionStatusErrorCode::UNKNOWN); return StatusCode::MODEL_NOT_LOADED; } + try { + bool isModelLoadedFromCache = compiledModel->get_property(ov::loaded_from_cache); + SPDLOG_LOGGER_DEBUG(modelmanager_logger, "Is model loaded from cache: {}", isModelLoadedFromCache); + } catch (...) { + SPDLOG_LOGGER_DEBUG(modelmanager_logger, "Unable to get information if model was loaded from cache; model: {}; version: {}; device: {}", getName(), getVersion(), config.getTargetDevice()); + } this->status.setAvailable(); modelLoadedNotify.notify_all(); return status; @@ -1091,6 +1108,7 @@ void ModelInstance::unloadModelComponents() { customLoaderInterfacePtr->unloadModel(getName(), getVersion()); } } + malloc_trim(0); } const std::set& ModelInstance::getOptionalInputNames() { @@ -1207,7 +1225,7 @@ Status ModelInstance::infer(const RequestType* requestProto, timer.start(SERIALIZE); OutputGetter outputGetter(inferRequest); - status = serializePredictResponse(outputGetter, getName(), getVersion(), getOutputsInfo(), responseProto, getTensorInfoName); + status = serializePredictResponse(outputGetter, getName(), getVersion(), getOutputsInfo(), responseProto, getTensorInfoName, useSharedOutputContentFn(requestProto)); timer.stop(SERIALIZE); if (!status.ok()) return status; diff --git a/src/modelinstance.hpp b/src/modelinstance.hpp index ca7c641713..9972b61efb 100644 --- a/src/modelinstance.hpp +++ b/src/modelinstance.hpp @@ -26,8 +26,6 @@ #include -#include "inferencerequest.hpp" -#include "inferenceresponse.hpp" #include "kfs_frontend/kfs_grpc_inference_service.hpp" #include "model_metric_reporter.hpp" #include "modelchangesubscription.hpp" @@ -41,6 +39,8 @@ namespace ovms { class MetricRegistry; class ModelInstanceUnloadGuard; +class InferenceRequest; +class InferenceResponse; class PipelineDefinition; class Status; template @@ -232,6 +232,11 @@ class ModelInstance { const Status validate(const RequestType* request); private: + /** + * @brief Holds model required file names. First is loaded + */ + std::vector modelFiles; + /** * @brief Holds the information about inputs and it's parameters */ @@ -242,11 +247,6 @@ class ModelInstance { */ tensor_map_t outputsInfo; - /** - * @brief Holds model required file names. First is loaded - */ - std::vector modelFiles; - /** * @brief OpenVINO inference execution stream pool */ @@ -367,6 +367,14 @@ class ModelInstance { return path; } + /** + * @brief Gets model files' paths + * + * @return vector of paths + */ + const std::vector& getModelFiles() const { + return modelFiles; + } /** * @brief Gets version * @@ -437,6 +445,8 @@ class ModelInstance { return inputsInfo; } + virtual ov::AnyMap getRTInfo() const; + /** * @brief Get the Outputs Info object * diff --git a/src/modelmanager.cpp b/src/modelmanager.cpp index 9e1d6dc0ff..9972363834 100644 --- a/src/modelmanager.cpp +++ b/src/modelmanager.cpp @@ -52,6 +52,10 @@ #include "gcsfilesystem.hpp" #include "localfilesystem.hpp" #include "logging.hpp" +#if (MEDIAPIPE_DISABLE == 0) +#include "mediapipe_internal/mediapipefactory.hpp" +#include "mediapipe_internal/mediapipegraphdefinition.hpp" +#endif #include "metric_config.hpp" #include "metric_registry.hpp" #include "modelinstance.hpp" // for logging @@ -145,7 +149,10 @@ void ModelManager::logPluginConfiguration() { } } -ModelManager::~ModelManager() = default; +ModelManager::~ModelManager() { + join(); + models.clear(); +} Status ModelManager::start(const Config& config) { watcherIntervalSec = config.filesystemPollWaitSeconds(); @@ -210,6 +217,7 @@ Status ModelManager::startFromConfig() { return StatusCode::UNKNOWN_ERROR; } + this->setRootDirectoryPath(""); Status status = StatusCode::OK; // Reading metric config only once per server start @@ -270,6 +278,15 @@ Status ModelManager::startFromConfig() { modelConfig.setBatchSize(std::nullopt); } + modelConfig.setRootDirectoryPath(this->rootDirectoryPath); + + try { + modelConfig.setBasePath(modelConfig.getBasePath()); + } catch (std::logic_error& e) { + SPDLOG_DEBUG("{}: {}", status.string(), e.what()); + return StatusCode::INTERNAL_ERROR; + } + return reloadModelWithVersions(modelConfig); } @@ -354,6 +371,53 @@ static Status processCustomNodeConfig(const rapidjson::Value& nodeConfig, Custom return StatusCode::OK; } +#if (MEDIAPIPE_DISABLE == 0) +Status ModelManager::processMediapipeConfig(rapidjson::Document& configJson, const MediapipeGraphConfig& config, std::set& mediapipesInConfigFile, MediapipeFactory& factory) { + if (mediapipesInConfigFile.find(config.getGraphName()) != mediapipesInConfigFile.end()) { + SPDLOG_LOGGER_WARN(modelmanager_logger, "Duplicated mediapipe names: {} defined in config file. Only first graph will be loaded.", config.getGraphName()); + return StatusCode::OK; // TODO @atobiszei do we want to have OK? + } + if (!factory.definitionExists(config.getGraphName())) { + SPDLOG_LOGGER_DEBUG(modelmanager_logger, "Mediapipe graph:{} was not loaded so far. Triggering load", config.getGraphName()); + auto status = factory.createDefinition(config.getGraphName(), config, *this); + mediapipesInConfigFile.insert(config.getGraphName()); + return status; + } + SPDLOG_LOGGER_WARN(modelmanager_logger, "Mediapipe graph:{} is already loaded. Triggering reload", config.getGraphName()); + auto status = factory.reloadDefinition(config.getGraphName(), + config, + *this); + mediapipesInConfigFile.insert(config.getGraphName()); + return status; +} +#endif + +#if (MEDIAPIPE_DISABLE == 0) +static Status parseMediapipeConfig(rapidjson::Document& configJson, std::string& rootDirectoryPath, std::vector& mediapipesInConfigFile) { + const auto itrp = configJson.FindMember("mediapipe_config_list"); + if (itrp == configJson.MemberEnd() || !itrp->value.IsArray()) { + return StatusCode::OK; + } + try { + for (const auto& mediapipeGraphConfig : itrp->value.GetArray()) { + MediapipeGraphConfig config; + config.setRootDirectoryPath(rootDirectoryPath); + auto status = config.parseNode(mediapipeGraphConfig); + if (status != StatusCode::OK) { + SPDLOG_LOGGER_ERROR(modelmanager_logger, "Parsing graph config failed"); + return status; + } + mediapipesInConfigFile.push_back(config); + } + } catch (const std::exception& e) { + SPDLOG_ERROR("Failed to process mediapipe graph config:{}", e.what()); + } catch (...) { + SPDLOG_ERROR("Failed to process mediapipe graph config."); + } + return StatusCode::OK; +} +#endif + static Status processPipelineConfig(rapidjson::Document& configJson, const rapidjson::Value& pipelineConfig, std::set& pipelinesInConfigFile, PipelineFactory& factory, ModelManager& manager) { const std::string pipelineName = pipelineConfig["name"].GetString(); if (pipelinesInConfigFile.find(pipelineName) != pipelinesInConfigFile.end()) { @@ -483,12 +547,38 @@ Status ModelManager::loadCustomNodeLibrariesConfig(rapidjson::Document& configJs librariesInConfig.emplace(libraryConfig.FindMember("name")->value.GetString()); this->customNodeLibraryManager->loadLibrary( libraryConfig.FindMember("name")->value.GetString(), - libraryConfig.FindMember("base_path")->value.GetString()); + this->getFullPath(libraryConfig.FindMember("base_path")->value.GetString())); } this->customNodeLibraryManager->unloadLibrariesRemovedFromConfig(librariesInConfig); return StatusCode::OK; } +#if (MEDIAPIPE_DISABLE == 0) +Status ModelManager::loadMediapipeGraphsConfig(rapidjson::Document& configJson, std::vector& mediapipesInConfigFile) { + if (mediapipesInConfigFile.size() == 0) { + SPDLOG_LOGGER_INFO(modelmanager_logger, "Configuration file doesn't have mediapipe property."); + mediapipeFactory.retireOtherThan({}, *this); + return StatusCode::OK; + } + std::set mediapipesInConfigFileNames; + Status firstErrorStatus = StatusCode::OK; + try { + for (const auto& mediapipeGraphConfig : mediapipesInConfigFile) { + auto status = processMediapipeConfig(configJson, mediapipeGraphConfig, mediapipesInConfigFileNames, mediapipeFactory); + if (status != StatusCode::OK) { + IF_ERROR_NOT_OCCURRED_EARLIER_THEN_SET_FIRST_ERROR(status); + } + } + mediapipeFactory.retireOtherThan(std::move(mediapipesInConfigFileNames), *this); + } catch (const std::exception& e) { + SPDLOG_ERROR("Failed to process mediapipe graph config:{}", e.what()); + } catch (...) { + SPDLOG_ERROR("Failed to process mediapipe graph config."); + } + return firstErrorStatus; +} +#endif + Status ModelManager::loadPipelinesConfig(rapidjson::Document& configJson) { const auto itrp = configJson.FindMember("pipeline_config_list"); if (itrp == configJson.MemberEnd() || !itrp->value.IsArray()) { @@ -565,6 +655,7 @@ Status ModelManager::loadCustomLoadersConfig(rapidjson::Document& configJson) { SPDLOG_INFO("Reading Custom Loader: {} configuration", loaderName); CustomLoaderConfig loaderConfig; + loaderConfig.setRootDirectoryPath(this->rootDirectoryPath); auto status = loaderConfig.parseNode(configs["config"]); if (status != StatusCode::OK) { IF_ERROR_NOT_OCCURRED_EARLIER_THEN_SET_FIRST_ERROR(status); @@ -604,22 +695,14 @@ Status ModelManager::loadMetricsConfig(rapidjson::Document& configJson) { } } -Status ModelManager::loadModelsConfig(rapidjson::Document& configJson, std::vector& gatedModelConfigs) { +Status ModelManager::loadModels(const rapidjson::Value::MemberIterator& modelsConfigList, std::vector& gatedModelConfigs, std::set& modelsInConfigFile, std::set& modelsWithInvalidConfig, std::unordered_map& newModelConfigs, const std::string& rootDirectoryPath) { Status firstErrorStatus = StatusCode::OK; - const auto itr = configJson.FindMember("model_config_list"); - - if (itr == configJson.MemberEnd() || !itr->value.IsArray()) { - SPDLOG_LOGGER_ERROR(modelmanager_logger, "Configuration file doesn't have models property."); - return StatusCode::JSON_INVALID; - } - - std::set modelsInConfigFile; - std::set modelsWithInvalidConfig; - std::unordered_map newModelConfigs; - for (const auto& configs : itr->value.GetArray()) { + for (const auto& configs : modelsConfigList->value.GetArray()) { ModelConfig modelConfig; + modelConfig.setRootDirectoryPath(rootDirectoryPath); auto status = modelConfig.parseNode(configs["config"]); + if (!status.ok()) { IF_ERROR_NOT_OCCURRED_EARLIER_THEN_SET_FIRST_ERROR(StatusCode::MODEL_CONFIG_INVALID); SPDLOG_LOGGER_ERROR(modelmanager_logger, "Parsing model: {} config failed due to error: {}", modelConfig.getName(), status.string()); @@ -645,6 +728,7 @@ Status ModelManager::loadModelsConfig(rapidjson::Document& configJson, std::vect SPDLOG_LOGGER_WARN(modelmanager_logger, "Duplicated model names: {} defined in config file. Only first definition will be loaded.", modelName); continue; } + status = reloadModelWithVersions(modelConfig); IF_ERROR_NOT_OCCURRED_EARLIER_THEN_SET_FIRST_ERROR(status); @@ -665,6 +749,74 @@ Status ModelManager::loadModelsConfig(rapidjson::Document& configJson, std::vect newModelConfigs.emplace(modelName, std::move(modelConfig)); } } + return firstErrorStatus; +} +#if (MEDIAPIPE_DISABLE == 0) +Status ModelManager::loadModelsConfig(rapidjson::Document& configJson, std::vector& gatedModelConfigs, std::vector& mediapipesInConfigFile) +#else +Status ModelManager::loadModelsConfig(rapidjson::Document& configJson, std::vector& gatedModelConfigs) +#endif +{ + Status firstErrorStatus = StatusCode::OK; + const auto itr = configJson.FindMember("model_config_list"); + + if (itr == configJson.MemberEnd() || !itr->value.IsArray()) { + SPDLOG_LOGGER_ERROR(modelmanager_logger, "Configuration file doesn't have models property."); + return StatusCode::JSON_INVALID; + } + std::set modelsInConfigFile; + std::set modelsWithInvalidConfig; + std::unordered_map newModelConfigs; + auto status = loadModels(itr, gatedModelConfigs, modelsInConfigFile, modelsWithInvalidConfig, newModelConfigs, this->rootDirectoryPath); + if (!status.ok()) { + SPDLOG_LOGGER_ERROR(modelmanager_logger, "Loading main OVMS config models failed."); + IF_ERROR_NOT_OCCURRED_EARLIER_THEN_SET_FIRST_ERROR(status); + } + +#if (MEDIAPIPE_DISABLE == 0) + for (auto mediapipeConfig : mediapipesInConfigFile) { + std::string subconfigPath = mediapipeConfig.getSubconfigPath(); + if (subconfigPath.empty()) { + std::string defaultSubconfigPath = mediapipeConfig.getBasePath() + "subconfig.json"; + std::ifstream ifs(defaultSubconfigPath); + if (!ifs.is_open()) { + SPDLOG_LOGGER_DEBUG(modelmanager_logger, "No subconfig path was provided for graph: {} and defualt subconfig file does not exist.", mediapipeConfig.getGraphName()); + continue; + } + subconfigPath = defaultSubconfigPath; + } + rapidjson::Document mediapipeConfigJson; + std::ifstream ifs(subconfigPath); + if (!ifs.is_open()) { + SPDLOG_LOGGER_DEBUG(modelmanager_logger, "Subconfig path: {} provided for graph: {} does not exist. Loading subconfig models will be skipped.", subconfigPath, subconfigPath); + continue; + } + rapidjson::IStreamWrapper isw(ifs); + rapidjson::ParseResult parseResult = configJson.ParseStream(isw); + if (!parseResult) { + SPDLOG_LOGGER_ERROR(modelmanager_logger, "Mediapipe: {} configuration: {} file is not a valid JSON file. Error: {}", + mediapipeConfig.getGraphName(), subconfigPath, rapidjson::GetParseError_En(parseResult.Code())); + return StatusCode::JSON_INVALID; + } + if (validateJsonAgainstSchema(configJson, MEDIAPIPE_SUBCONFIG_SCHEMA.c_str()) != StatusCode::OK) { + SPDLOG_LOGGER_ERROR(modelmanager_logger, "Mediapipe configuration file is not in valid configuration format"); + return StatusCode::JSON_INVALID; + } + const auto mediapipeItr = configJson.FindMember("model_config_list"); + + if (mediapipeItr == configJson.MemberEnd() || !mediapipeItr->value.IsArray()) { + SPDLOG_LOGGER_ERROR(modelmanager_logger, "Configuration file doesn't have models property."); + return StatusCode::JSON_INVALID; + } + std::string subconfigRootDirectoryPath; + FileSystem::setRootDirectoryPath(subconfigRootDirectoryPath, subconfigPath); + status = loadModels(mediapipeItr, gatedModelConfigs, modelsInConfigFile, modelsWithInvalidConfig, newModelConfigs, subconfigRootDirectoryPath); + if (!status.ok()) { + IF_ERROR_NOT_OCCURRED_EARLIER_THEN_SET_FIRST_ERROR(status); + SPDLOG_LOGGER_ERROR(modelmanager_logger, "Loading Mediapipe {} models from subconfig {} failed.", mediapipeConfig.getGraphName(), subconfigPath); + } + } +#endif this->servedModelConfigs = std::move(newModelConfigs); retireModelsRemovedFromConfigFile(modelsInConfigFile, modelsWithInvalidConfig); return firstErrorStatus; @@ -756,7 +908,7 @@ Status ModelManager::loadConfig(const std::string& jsonFilename) { return lastLoadConfigStatus; } - if (validateJsonAgainstSchema(configJson, MODELS_CONFIG_SCHEMA) != StatusCode::OK) { + if (validateJsonAgainstSchema(configJson, MODELS_CONFIG_SCHEMA.c_str()) != StatusCode::OK) { SPDLOG_LOGGER_ERROR(modelmanager_logger, "Configuration file is not in valid configuration format"); lastLoadConfigStatus = StatusCode::JSON_INVALID; return lastLoadConfigStatus; @@ -777,14 +929,28 @@ Status ModelManager::loadConfig(const std::string& jsonFilename) { Status firstErrorStatus = StatusCode::OK; + this->setRootDirectoryPath(jsonFilename); + // load the custom loader config, if available status = loadCustomLoadersConfig(configJson); if (!status.ok()) { IF_ERROR_NOT_OCCURRED_EARLIER_THEN_SET_FIRST_ERROR(status); } - + // handling mediapipe graph config is divided into two steps parsing and loading because + // before loading mediapipe graph we need first to load models from it's subconfig together with + // models from ovms config +#if (MEDIAPIPE_DISABLE == 0) + std::vector mediapipesInConfigFile; + status = parseMediapipeConfig(configJson, this->rootDirectoryPath, mediapipesInConfigFile); + if (!status.ok()) { + IF_ERROR_NOT_OCCURRED_EARLIER_THEN_SET_FIRST_ERROR(status); + } + std::vector gatedModelConfigs; + status = loadModelsConfig(configJson, gatedModelConfigs, mediapipesInConfigFile); +#else std::vector gatedModelConfigs; status = loadModelsConfig(configJson, gatedModelConfigs); +#endif if (!status.ok()) { IF_ERROR_NOT_OCCURRED_EARLIER_THEN_SET_FIRST_ERROR(status); } @@ -796,6 +962,13 @@ Status ModelManager::loadConfig(const std::string& jsonFilename) { if (!status.ok()) { IF_ERROR_NOT_OCCURRED_EARLIER_THEN_SET_FIRST_ERROR(status); } + +#if (MEDIAPIPE_DISABLE == 0) + status = loadMediapipeGraphsConfig(configJson, mediapipesInConfigFile); + if (!status.ok()) { + IF_ERROR_NOT_OCCURRED_EARLIER_THEN_SET_FIRST_ERROR(status); + } +#endif status = tryReloadGatedModelConfigs(gatedModelConfigs); if (!status.ok()) { IF_ERROR_NOT_OCCURRED_EARLIER_THEN_SET_FIRST_ERROR(status); @@ -874,7 +1047,11 @@ std::string ModelManager::getConfigFileMD5() { ifs.close(); unsigned char result[MD5_DIGEST_LENGTH]; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" MD5((unsigned char*)str.c_str(), str.size(), result); +#pragma GCC diagnostic pop + std::string md5sum(reinterpret_cast(result), MD5_DIGEST_LENGTH); return (md5sum); } @@ -906,7 +1083,6 @@ Status ModelManager::configFileReloadNeeded(bool& isNeeded) { void ModelManager::watcher(std::future exitSignal, bool watchConfigFile) { SPDLOG_LOGGER_INFO(modelmanager_logger, "Started model manager thread"); - while (exitSignal.wait_for(std::chrono::seconds(watcherIntervalSec)) == std::future_status::timeout) { SPDLOG_LOGGER_TRACE(modelmanager_logger, "Models configuration and filesystem check cycle begin"); std::lock_guard loadingLock(configMtx); @@ -918,7 +1094,6 @@ void ModelManager::watcher(std::future exitSignal, bool watchConfigFile) { } } updateConfigurationWithoutConfigFile(); - SPDLOG_LOGGER_TRACE(modelmanager_logger, "Models configuration and filesystem check cycle end"); } SPDLOG_LOGGER_INFO(modelmanager_logger, "Stopped model manager thread"); @@ -981,10 +1156,12 @@ void ModelManager::cleanupResources() { } void ModelManager::join() { - if (watcherStarted) + if (watcherStarted) { exitTrigger.set_value(); - if (cleanerStarted) + } + if (cleanerStarted) { cleanerExitTrigger.set_value(); + } if (watcherStarted) { if (monitor.joinable()) { @@ -1113,23 +1290,38 @@ std::shared_ptr ModelManager::getModelIfExistCreateElse(const std:: } std::shared_ptr ModelManager::getFilesystem(const std::string& basePath) { - if (basePath.rfind(S3FileSystem::S3_URL_PREFIX, 0) == 0) { + if (basePath.rfind(FileSystem::S3_URL_PREFIX, 0) == 0) { Aws::SDKOptions options; Aws::InitAPI(options); return std::make_shared(options, basePath); } - if (basePath.rfind(GCSFileSystem::GCS_URL_PREFIX, 0) == 0) { + if (basePath.rfind(FileSystem::GCS_URL_PREFIX, 0) == 0) { return std::make_shared(); } - if (basePath.rfind(AzureFileSystem::AZURE_URL_FILE_PREFIX, 0) == 0) { + if (basePath.rfind(FileSystem::AZURE_URL_FILE_PREFIX, 0) == 0) { return std::make_shared(); } - if (basePath.rfind(AzureFileSystem::AZURE_URL_BLOB_PREFIX, 0) == 0) { + if (basePath.rfind(FileSystem::AZURE_URL_BLOB_PREFIX, 0) == 0) { return std::make_shared(); } return std::make_shared(); } +const std::string ModelManager::getFullPath(const std::string& pathToCheck) const { + if (!FileSystem::isLocalFilesystem(pathToCheck)) { + // Cloud filesystem + return pathToCheck; + } else if (pathToCheck.size() > 0 && pathToCheck.at(0) == '/') { + // Full path case + return pathToCheck; + } else { + // Relative path case + if (this->rootDirectoryPath.empty()) + SPDLOG_LOGGER_ERROR(modelmanager_logger, "Using relative path without setting configuration directory path."); + return this->rootDirectoryPath + pathToCheck; + } +} + Status ModelManager::readAvailableVersions(std::shared_ptr& fs, const std::string& base, model_versions_t& versions) { files_list_t dirs; @@ -1380,4 +1572,15 @@ const CustomNodeLibraryManager& ModelManager::getCustomNodeLibraryManager() cons return *customNodeLibraryManager; } +Status ModelManager::createPipeline(std::shared_ptr& graph, + const std::string& name, + const KFSRequest* request, + KFSResponse* response) { +#if (MEDIAPIPE_DISABLE == 0) + return this->mediapipeFactory.create(graph, name, request, response, *this); +#else + SPDLOG_ERROR("Mediapipe support was disabled during build process..."); + return StatusCode::INTERNAL_ERROR; +#endif +} } // namespace ovms diff --git a/src/modelmanager.hpp b/src/modelmanager.hpp index 4a60162134..60a83987d7 100644 --- a/src/modelmanager.hpp +++ b/src/modelmanager.hpp @@ -33,9 +33,11 @@ #include "dags/pipeline_factory.hpp" #include "global_sequences_viewer.hpp" +#if (MEDIAPIPE_DISABLE == 0) +#include "mediapipe_internal/mediapipefactory.hpp" +#endif #include "metric_config.hpp" #include "model.hpp" -#include "modelconfig.hpp" #include "status.hpp" namespace ovms { @@ -48,7 +50,9 @@ class CNLIMWrapper; class CustomLoaderConfig; class CustomNodeLibraryManager; class MetricRegistry; +class ModelConfig; class FileSystem; +class MediapipeGraphExecutor; struct FunctorSequenceCleaner; struct FunctorResourcesCleaner; /** @@ -76,7 +80,9 @@ class ModelManager { std::unique_ptr ieCore; PipelineFactory pipelineFactory; - +#if (MEDIAPIPE_DISABLE == 0) + MediapipeFactory mediapipeFactory; +#endif std::unique_ptr customNodeLibraryManager; std::vector> resources = {}; @@ -99,7 +105,14 @@ class ModelManager { Status cleanupModelTmpFiles(ModelConfig& config); Status reloadModelVersions(std::shared_ptr& model, std::shared_ptr& fs, ModelConfig& config, std::shared_ptr& versionsToReload, std::shared_ptr versionsFailed); Status addModelVersions(std::shared_ptr& model, std::shared_ptr& fs, ModelConfig& config, std::shared_ptr& versionsToStart, std::shared_ptr versionsFailed); + Status loadModels(const rapidjson::Value::MemberIterator& modelsConfigList, std::vector& gatedModelConfigs, std::set& modelsInConfigFile, std::set& modelsWithInvalidConfig, std::unordered_map& newModelConfigs, const std::string& rootDirectoryPath); +#if (MEDIAPIPE_DISABLE == 0) + Status processMediapipeConfig(rapidjson::Document& configJson, const MediapipeGraphConfig& config, std::set& mediapipesInConfigFile, MediapipeFactory& factory); + Status loadMediapipeGraphsConfig(rapidjson::Document& configJson, std::vector& mediapipesInConfigFile); + Status loadModelsConfig(rapidjson::Document& configJson, std::vector& gatedModelConfigs, std::vector& mediapipesInConfigFile); +#else Status loadModelsConfig(rapidjson::Document& configJson, std::vector& gatedModelConfigs); +#endif Status tryReloadGatedModelConfigs(std::vector& gatedModelConfigs); Status loadCustomNodeLibrariesConfig(rapidjson::Document& configJson); Status loadPipelinesConfig(rapidjson::Document& configJson); @@ -205,7 +218,38 @@ class ModelManager { MetricRegistry* metricRegistry; + /** + * @brief Json config directory path + * + */ + std::string rootDirectoryPath; + + /** + * @brief Set json config directory path + * + * @param configFileFullPath + */ + void setRootDirectoryPath(const std::string& configFileFullPath) { + FileSystem::setRootDirectoryPath(this->rootDirectoryPath, configFileFullPath); + } + public: + /** + * @brief Get the full path from relative or full path + * + * @return const std::string& + */ + const std::string getFullPath(const std::string& pathToCheck) const; + + /** + * @brief Get the config root path + * + * @return const std::string& + */ + const std::string getRootDirectoryPath() const { + return rootDirectoryPath; + } + /** * @brief Mutex for blocking concurrent add & find of model */ @@ -266,6 +310,12 @@ class ModelManager { return pipelineFactory; } +#if (MEDIAPIPE_DISABLE == 0) + const MediapipeFactory& getMediapipeFactory() const { + return mediapipeFactory; + } +#endif + const CustomNodeLibraryManager& getCustomNodeLibraryManager() const; /** @@ -312,11 +362,15 @@ class ModelManager { template Status createPipeline(std::unique_ptr& pipeline, - const std::string name, + const std::string& name, const RequestType* request, ResponseType* response) { return pipelineFactory.create(pipeline, name, request, response, *this); } + Status createPipeline(std::shared_ptr& graph, + const std::string& name, + const KFSRequest* request, + KFSResponse* response); const bool pipelineDefinitionExists(const std::string& name) const { return pipelineFactory.definitionExists(name); diff --git a/src/ov_utils.cpp b/src/ov_utils.cpp index 9fd533c496..19c9c3a7a6 100644 --- a/src/ov_utils.cpp +++ b/src/ov_utils.cpp @@ -42,7 +42,7 @@ Status createSharedTensor(ov::Tensor& destinationTensor, ov::element::Type_t pre return StatusCode::OK; } -std::string getTensorMapString(const std::map>& inputsInfo) { +std::string getTensorMapString(const std::map>& inputsInfo) { std::stringstream stringStream; for (const auto& pair : inputsInfo) { const auto& name = pair.first; diff --git a/src/ov_utils.hpp b/src/ov_utils.hpp index 29aeda3cb3..6f5a070ec6 100644 --- a/src/ov_utils.hpp +++ b/src/ov_utils.hpp @@ -35,7 +35,7 @@ Status createSharedTensor(ov::Tensor& destinationTensor, ov::element::Type_t pre **/ ov::Tensor createSharedTensor(ov::element::Type_t precision, const shape_t& shape, void* data); -std::string getTensorMapString(const std::map>& tensorMap); +std::string getTensorMapString(const std::map>& tensorMap); Status tensorClone(ov::Tensor& destinationTensor, const ov::Tensor& sourceTensor); diff --git a/src/ovms.h b/src/ovms.h index 2fb7427efa..4d12959354 100644 --- a/src/ovms.h +++ b/src/ovms.h @@ -27,6 +27,17 @@ typedef struct OVMS_Status_ OVMS_Status; typedef struct OVMS_ServerSettings_ OVMS_ServerSettings; typedef struct OVMS_ModelsSettings_ OVMS_ModelsSettings; +typedef struct OVMS_ServableMetadata_ OVMS_ServableMetadata; + +#define OVMS_API_VERSION_MAJOR 0 +#define OVMS_API_VERSION_MINOR 3 + +// Function to retrieve OVMS API version. +// +// \param major Returns major version of OVMS API. Represents breaking, non-backward compatible API changes. +// \param minor Returns minor version of OVMS API. Represents non-breaking, backward compatible API changes. +OVMS_Status* OVMS_ApiVersion(uint32_t* major, uint32_t* minor); + // OVMS_DataType // // Tensor and parameter data types recognized by OVMS. @@ -323,7 +334,7 @@ OVMS_Status* OVMS_ServerStartFromConfigurationFile(OVMS_Server* server, // \param servableName The name of the servable to be used // \param servableVersion The version of the servable to be used // \return OVMS_Status object in case of failure -OVMS_Status* OVMS_InferenceRequestNew(OVMS_InferenceRequest** request, OVMS_Server* server, const char* servableName, uint32_t servableVersion); +OVMS_Status* OVMS_InferenceRequestNew(OVMS_InferenceRequest** request, OVMS_Server* server, const char* servableName, int64_t servableVersion); void OVMS_InferenceRequestDelete(OVMS_InferenceRequest* response); // Set the data of the input buffer. Ownership of data needs to be maintained during inference. @@ -334,7 +345,7 @@ void OVMS_InferenceRequestDelete(OVMS_InferenceRequest* response); // \param shape The shape of the input // \param dimCount The number of dimensions of the shape // \return OVMS_Status object in case of failure -OVMS_Status* OVMS_InferenceRequestAddInput(OVMS_InferenceRequest* request, const char* inputName, OVMS_DataType datatype, const uint64_t* shape, uint32_t dimCount); +OVMS_Status* OVMS_InferenceRequestAddInput(OVMS_InferenceRequest* request, const char* inputName, OVMS_DataType datatype, const int64_t* shape, size_t dimCount); // Set the data of the input buffer. Ownership of data needs to be maintained during inference. // @@ -400,7 +411,7 @@ OVMS_Status* OVMS_InferenceResponseGetOutputCount(OVMS_InferenceResponse* respon // \param bufferType The buffer type of the data // \param deviceId The device id of the data memory buffer // \return OVMS_Status object in case of failure -OVMS_Status* OVMS_InferenceResponseGetOutput(OVMS_InferenceResponse* response, uint32_t id, const char** name, OVMS_DataType* datatype, const uint64_t** shape, uint32_t* dimCount, const void** data, size_t* byteSize, OVMS_BufferType* bufferType, uint32_t* deviceId); +OVMS_Status* OVMS_InferenceResponseGetOutput(OVMS_InferenceResponse* response, uint32_t id, const char** name, OVMS_DataType* datatype, const int64_t** shape, size_t* dimCount, const void** data, size_t* byteSize, OVMS_BufferType* bufferType, uint32_t* deviceId); // Get the number of parameters in response. // @@ -425,11 +436,85 @@ void OVMS_InferenceResponseDelete(OVMS_InferenceResponse* response); // Execute synchronous inference. // +// \param server The server object // \param request The request object // \param response The respons object. In case of success, caller takes the ownership of the response // \return OVMS_Status object in case of failure OVMS_Status* OVMS_Inference(OVMS_Server* server, OVMS_InferenceRequest* request, OVMS_InferenceResponse** response); +// Get OVMS_ServableMetadata object +// +// Creates OVMS_ServableMetadata object describing inputs and outputs. +// Returned object needs to be deleted after use with OVMS_ServableMetadataDelete +// if call succeeded. +// +// \param server The server object +// \param servableName The name of the servable to be used +// \param servableVersion The version of the servable to be used +// \param metadata The metadata object to be created +// \return OVMS_Status object in case of failure +OVMS_Status* OVMS_GetServableMetadata(OVMS_Server* server, const char* servableName, int64_t servableVersion, OVMS_ServableMetadata** metadata); + +// Get the number of inputs of servable. +// +// \param metadata The metadata object +// \param count The parameter count to be set +// \return OVMS_Status object in case of failure +OVMS_Status* OVMS_ServableMetadataGetInputCount(OVMS_ServableMetadata* metadata, uint32_t* count); + +// Get the number of outputs of servable. +// +// \param metadata The metadata object +// \param count The parameter count to be set +// \return OVMS_Status object in case of failure +OVMS_Status* OVMS_ServableMetadataGetOutputCount(OVMS_ServableMetadata* metadata, uint32_t* count); + +// Get the metadata of servable input given the index +// +// The received shapeMin and shapeMax indicate whether the underlying servable accepts +// a shape range or fully dynamic shape. A value of -1 for both shapeMin and shapeMax +// for a specific dimension means that the servable accepts any value on that dimension. +// +// \param metadata The metadata object +// \param id The id of the input +// \param name The name of the input +// \param datatype The data type of the input +// \param dimCount The number of dimensions of the shape +// \param shapeMin The shape lower bounds of the input +// \param shapeMax The shape upper bounds of the input +// \return OVMS_Status object in case of failure +OVMS_Status* OVMS_ServableMetadataGetInput(OVMS_ServableMetadata* metadata, uint32_t id, const char** name, OVMS_DataType* datatype, size_t* dimCount, int64_t** shapeMinArray, int64_t** shapeMaxArray); + +// Get the metadata of servable output given the index +// +// The received shapeMin and shapeMax indicate whether the underlying servable accepts +// a shape range or fully dynamic shape. A value of -1 for both shapeMin and shapeMax +// for a specific dimension means that the servable accepts any value on that dimension. +// +// \param metadata The metadata object +// \param id The id of the output +// \param name The name of the output +// \param datatype The data type of the output +// \param dimCount The number of dimensions of the shape +// \param shapeMin The shape of the output +// \param shapeMax The shape of the output +// \return OVMS_Status object in case of failure +OVMS_Status* OVMS_ServableMetadataGetOutput(OVMS_ServableMetadata* metadata, uint32_t id, const char** name, OVMS_DataType* datatype, size_t* dimCount, int64_t** shapeMinArray, int64_t** shapeMaxArray); + + +// EXPERIMENTAL // TODO if declare specific type for underlying ov::AnyMap +// Get the additional info about servable. +// +// \param metadata The metadata object +// \param info The ptr to the ov::AnyMap* +// \return OVMS_Status object in case of failure +OVMS_Status* OVMS_ServableMetadataGetInfo(OVMS_ServableMetadata* metadata, const void** info); + +// Deallocates a status object. +// +// \param metadata The metadata object +void OVMS_ServableMetadataDelete(OVMS_ServableMetadata* metadata); + #ifdef __cplusplus } #endif diff --git a/src/predict_request_validation_utils.cpp b/src/predict_request_validation_utils.cpp index 817f54638b..95b95269ce 100644 --- a/src/predict_request_validation_utils.cpp +++ b/src/predict_request_validation_utils.cpp @@ -18,6 +18,9 @@ #pragma GCC diagnostic ignored "-Wall" #include "tensorflow_serving/apis/prediction_service.grpc.pb.h" #pragma GCC diagnostic pop +#include +#include +#include #include #include @@ -61,7 +64,7 @@ dimension_value_t RequestShapeInfo::getDim(siz } template <> -dimension_value_t RequestShapeInfo::getDim(size_t i) { +dimension_value_t RequestShapeInfo::getDim(size_t i) { return tensor.getShape()[i]; } template <> @@ -73,7 +76,7 @@ size_t RequestShapeInfo::getShapeSize() { return tensor.tensor_shape().dim_size(); } template <> -size_t RequestShapeInfo::getShapeSize() { +size_t RequestShapeInfo::getShapeSize() { return tensor.getShape().size(); } template <> @@ -85,7 +88,7 @@ const KFSShapeType& RequestShapeInfo::getShap return tensor.shape(); } template <> -const shape_t& RequestShapeInfo::getShape() { +const signed_shape_t& RequestShapeInfo::getShape() { return tensor.getShape(); } template @@ -126,12 +129,13 @@ class RequestValidator { Status checkIfShapeValuesNegative(const InputTensorType& proto) const; Status validateNumberOfBinaryInputShapeDimensions(const InputTensorType& proto) const; Status checkBatchSizeMismatch(const InputTensorType& proto, const Dimension& servableBatchSize, const size_t batchSizeIndex, Status& finalStatus, Mode batchingMode, Mode shapeMode) const; - Status checkBinaryBatchSizeMismatch(const InputTensorType& proto, const Dimension& servableBatchSize, Status& finalStatus, Mode batchingMode, Mode shapeMode) const; + Status checkBinaryBatchSizeMismatch(const InputTensorType& proto, const Dimension& servableBatchSize, Status& finalStatus, Mode batchingMode, Mode shapeMode, int32_t inputBatchSize) const; Status checkShapeMismatch(const InputTensorType& proto, const ovms::TensorInfo& inputInfo, const size_t batchSizeIndex, Status& finalStatus, Mode batchingMode, Mode shapeMode) const; Status validateTensorContent(const InputTensorType& proto, ovms::Precision expectedPrecision, size_t bufferId) const; Status validateNumberOfShapeDimensions(const ovms::TensorInfo& inputInfo, const InputTensorType& proto) const; + Status validateRawInputContentsFormatAndShape(const ovms::TensorInfo& inputInfo, const RequestType& request, const size_t& bufferId, Status& finalStatus, Mode batchingMode, Mode shapeMode) const; Status validatePrecision(const ovms::TensorInfo& inputInfo, const InputTensorType& proto) const; - bool checkIfNativeFileFormatUsed(const InputTensorType& proto, const std::string inputName) const; + Status checkStringShapeMismatch(const InputTensorType& proto, const ovms::TensorInfo& inputInfo, Status& finalStatus, Mode batchingMode, Mode shapeMode, int32_t inputBatchSize, size_t inputWidth) const; Status validateRequestCoherency() const; Status validate(); }; @@ -158,7 +162,7 @@ Status RequestValidator -Status RequestValidator::validateRequestCoherency() const { +Status RequestValidator::validateRequestCoherency() const { return StatusCode::OK; } @@ -194,7 +198,7 @@ const std::string& RequestValidator -const std::string& RequestValidator::getCurrentlyValidatedInputName() const { +const std::string& RequestValidator::getCurrentlyValidatedInputName() const { return *currentlyValidatedName; } @@ -207,7 +211,7 @@ const TFSInputTensorType& RequestValidatorsecond; } template <> -const InferenceTensor& RequestValidator::getInputFromIt(const InferenceTensor* const& it) const { +const InferenceTensor& RequestValidator::getInputFromIt(const InferenceTensor* const& it) const { return *it; } @@ -228,7 +232,7 @@ Status RequestValidator -Status RequestValidator::validateNumberOfInputs() const { +Status RequestValidator::validateNumberOfInputs() const { size_t expectedNumberOfInputs = inputsInfo.size(); if (request.getInputsSize() > 0 && expectedNumberOfInputs == static_cast(request.getInputsSize())) { return StatusCode::OK; @@ -278,7 +282,7 @@ Status RequestValidator -Status RequestValidator::validateAndGetInput(const InferenceRequest& request, const std::string& name, const InferenceTensor*& it, size_t& bufferId) { +Status RequestValidator::validateAndGetInput(const InferenceRequest& request, const std::string& name, const InferenceTensor*& it, size_t& bufferId) { if (request.getInput(name.c_str(), &it) != StatusCode::NONEXISTENT_TENSOR) { currentlyValidatedName = &name; return StatusCode::OK; @@ -320,12 +324,33 @@ Status RequestValidator std::numeric_limits::max() / inputBatchSize) { + return StatusCode::INVALID_STRING_MAX_SIZE_EXCEEDED; + } + size_t expectedTensorSize = inputBatchSize * inputWidth; + if (expectedTensorSize > MAX_2D_STRING_ARRAY_SIZE) { + std::stringstream ss; + ss << "; actual " << expectedTensorSize / (1024 * 1024) << "MB (max 1GB)"; + const std::string details = ss.str(); + SPDLOG_DEBUG(details); + return Status(StatusCode::INVALID_STRING_MAX_SIZE_EXCEEDED, details); + } + return StatusCode::OK; +} + template <> Status RequestValidator::validateNumberOfBinaryInputShapeDimensions(const KFSTensorInputProto& proto) const { RequestShapeInfo rsi(proto); if (rsi.getShapeSize() != 1) { std::stringstream ss; - ss << "Expected number of binary input shape dimensions: 1; Actual: " << rsi.getShapeSize() << "; input name: " << getCurrentlyValidatedInputName(); + ss << "Expected number of input shape dimensions: 1; Actual: " << rsi.getShapeSize() << "; input name: " << getCurrentlyValidatedInputName(); const std::string details = ss.str(); SPDLOG_DEBUG("[servable name: {} version: {}] Invalid number of shape dimensions - {}", servableName, servableVersion, details); return Status(StatusCode::INVALID_NO_OF_SHAPE_DIMENSIONS, details); @@ -333,11 +358,11 @@ Status RequestValidator -Status RequestValidator::validateNumberOfBinaryInputShapeDimensions(const InferenceTensor& tensor) const { - RequestShapeInfo rsi(tensor); +Status RequestValidator::validateNumberOfBinaryInputShapeDimensions(const InferenceTensor& tensor) const { + RequestShapeInfo rsi(tensor); if (rsi.getShapeSize() != 1) { std::stringstream ss; - ss << "Expected number of binary input shape dimensions: 1; Actual: " << rsi.getShapeSize() << "; input name: " << getCurrentlyValidatedInputName(); + ss << "Expected number of input shape dimensions: 1; Actual: " << rsi.getShapeSize() << "; input name: " << getCurrentlyValidatedInputName(); const std::string details = ss.str(); SPDLOG_DEBUG("[servable name: {} version: {}] Invalid number of shape dimensions - {}", servableName, servableVersion, details); return Status(StatusCode::INVALID_NO_OF_SHAPE_DIMENSIONS, details); @@ -365,15 +390,19 @@ Status RequestValidator -Status RequestValidator::checkBinaryBatchSizeMismatch(const TFSInputTensorType& proto, const Dimension& servableBatchSize, Status& finalStatus, Mode batchingMode, Mode shapeMode) const { +Status RequestValidator::checkBinaryBatchSizeMismatch(const TFSInputTensorType& proto, const Dimension& servableBatchSize, Status& finalStatus, Mode batchingMode, Mode shapeMode, int32_t inputBatchSize) const { RequestShapeInfo rsi(proto); - if (proto.string_val_size() <= 0) { + if (inputBatchSize <= 0) { std::stringstream ss; ss << "Batch size must be positive; input name: " << getCurrentlyValidatedInputName(); const std::string details = ss.str(); SPDLOG_DEBUG("[servable name: {} version: {}] Invalid batch size - {}", servableName, servableVersion, details); return Status(StatusCode::INVALID_BATCH_SIZE, details); } + if (rsi.getDim(0) != inputBatchSize) { + SPDLOG_DEBUG("[servable name: {} version: {}] Batch size in request {} does not match actual {}", servableName, servableVersion, rsi.getDim(0), inputBatchSize); + return StatusCode::INVALID_BATCH_SIZE; + } if (servableBatchSize.match(rsi.getDim(0))) { return StatusCode::OK; } @@ -390,15 +419,19 @@ Status RequestValidator -Status RequestValidator::checkBinaryBatchSizeMismatch(const KFSTensorInputProto& proto, const Dimension& servableBatchSize, Status& finalStatus, Mode batchingMode, Mode shapeMode) const { +Status RequestValidator::checkBinaryBatchSizeMismatch(const KFSTensorInputProto& proto, const Dimension& servableBatchSize, Status& finalStatus, Mode batchingMode, Mode shapeMode, int32_t inputBatchSize) const { RequestShapeInfo rsi(proto); - if (proto.contents().bytes_contents_size() <= 0) { + if (inputBatchSize <= 0) { std::stringstream ss; ss << "Batch size must be positive; input name: " << getCurrentlyValidatedInputName(); const std::string details = ss.str(); SPDLOG_DEBUG("[servable name: {} version: {}] Invalid batch size - {}", servableName, servableVersion, details); return Status(StatusCode::INVALID_BATCH_SIZE, details); } + if (rsi.getDim(0) != inputBatchSize) { + SPDLOG_DEBUG("[servable name: {} version: {}] Batch size in request {} does not match actual {}", servableName, servableVersion, rsi.getDim(0), inputBatchSize); + return StatusCode::INVALID_BATCH_SIZE; + } if (servableBatchSize.match(rsi.getDim(0))) { return StatusCode::OK; } @@ -415,8 +448,8 @@ Status RequestValidator -Status RequestValidator::checkBinaryBatchSizeMismatch(const InferenceTensor& tensor, const Dimension& servableBatchSize, Status& finalStatus, Mode batchingMode, Mode shapeMode) const { - RequestShapeInfo rsi(tensor); +Status RequestValidator::checkBinaryBatchSizeMismatch(const InferenceTensor& tensor, const Dimension& servableBatchSize, Status& finalStatus, Mode batchingMode, Mode shapeMode, int32_t inputBatchSize) const { + RequestShapeInfo rsi(tensor); if (rsi.getDim(0) <= 0) { std::stringstream ss; ss << "Batch size must be positive; input name: " << getCurrentlyValidatedInputName(); @@ -484,6 +517,73 @@ Status RequestValidator:: return StatusCode::OK; } +static size_t getStringInputWidth(const KFSTensorInputProto& src) { + size_t maxStringLength = 0; + for (const auto& str : src.contents().bytes_contents()) { + maxStringLength = std::max(maxStringLength, str.size()); + } + return maxStringLength + 1; +} + +static size_t getStringInputWidth(const tensorflow::TensorProto& src) { + size_t maxStringLength = 0; + for (const auto& str : src.string_val()) { + maxStringLength = std::max(maxStringLength, str.size()); + } + return maxStringLength + 1; +} + +static size_t getStringInputWidth(const InferenceTensor& src) { + return 0; +} + +static int64_t getStringBatchSize(const KFSTensorInputProto& src) { + return src.contents().bytes_contents_size(); +} + +static int64_t getStringBatchSize(const tensorflow::TensorProto& src) { + return src.string_val_size(); +} + +static int64_t getStringBatchSize(const InferenceTensor& src) { + return 0; +} + +// To be called only for already validated proto against models with two dimensions. +template +Status RequestValidator::checkStringShapeMismatch(const InputTensorType& proto, const ovms::TensorInfo& inputInfo, Status& finalStatus, Mode batchingMode, Mode shapeMode, int32_t inputBatchSize, size_t inputWidth) const { + const auto& shape = inputInfo.getShape(); + bool mismatch = false; + if (batchingMode == AUTO) { // Skip batch dimension + if (!shape[1].match(static_cast(inputWidth))) { + mismatch = true; + } + } else { // Do not skip batch dimension + if (!shape.match(ov::Shape{static_cast(inputBatchSize), inputWidth})) { + mismatch = true; + } + } + if (!mismatch) { + return StatusCode::OK; + } + if (shapeMode == AUTO) { + finalStatus = StatusCode::RESHAPE_REQUIRED; + return StatusCode::OK; + } else { + auto stringInputShape = Shape({static_cast(inputBatchSize), static_cast(inputWidth)}); + std::stringstream ss; + ss << "Expected batch size: " << shape[0].toString() + << "; got: " << inputBatchSize + << "; Expected max null terminated string length: " << shape[1].toString() + << "; got: " << inputWidth + << "; input name: " << getCurrentlyValidatedInputName(); + const std::string details = ss.str(); + SPDLOG_DEBUG("[servable name: {} version: {}] Invalid shape - {}", servableName, servableVersion, details); + return Status(StatusCode::INVALID_SHAPE, details); + } + return StatusCode::OK; +} + template Status RequestValidator::validateInferenceTensorBufferType(const InferenceTensor& it) const { const Buffer* buffer = it.getBuffer(); @@ -649,7 +749,7 @@ Status RequestValidator -Status RequestValidator::validateTensorContent(const InferenceTensor& tensor, ovms::Precision expectedPrecision, size_t bufferId) const { +Status RequestValidator::validateTensorContent(const InferenceTensor& tensor, ovms::Precision expectedPrecision, size_t bufferId) const { const Buffer* buffer = tensor.getBuffer(); if (nullptr == buffer) { std::stringstream ss; @@ -710,7 +810,7 @@ Status RequestValidator -Status RequestValidator::validateNumberOfShapeDimensions(const ovms::TensorInfo& inputInfo, const InferenceTensor& tensor) const { +Status RequestValidator::validateNumberOfShapeDimensions(const ovms::TensorInfo& inputInfo, const InferenceTensor& tensor) const { // Network and request must have the same number of shape dimensions, higher than 0 const auto& shape = inputInfo.getShape(); if (tensor.getShape().size() <= 0 || @@ -753,7 +853,7 @@ Status RequestValidator -Status RequestValidator::validatePrecision(const ovms::TensorInfo& inputInfo, const InferenceTensor& tensor) const { +Status RequestValidator::validatePrecision(const ovms::TensorInfo& inputInfo, const InferenceTensor& tensor) const { if (tensor.getDataType() != getPrecisionAsOVMSDataType(inputInfo.getPrecision())) { std::stringstream ss; ss << "Expected: " << inputInfo.getPrecisionAsString() @@ -780,61 +880,57 @@ static Mode getShapeMode(const shapes_info_map_t& shapeInfo, const std::string& return it->second.shapeMode; } -template <> -bool RequestValidator::checkIfNativeFileFormatUsed(const TFSInputTensorType& proto, const std::string inputName) const { - if (proto.dtype() == tensorflow::DataType::DT_STRING) { - SPDLOG_DEBUG("[servable name: {} version: {}] Received request containing binary input: name: {}; batch size: {}", servableName, servableVersion, inputName, proto.string_val_size()); - return true; - } +static bool dataInRawInputContents(const ovms::InferenceRequest& request) { return false; } -template <> -bool RequestValidator::checkIfNativeFileFormatUsed(const KFSTensorInputProto& proto, const std::string inputName) const { - if (proto.datatype() == "BYTES") { - SPDLOG_DEBUG("[servable name: {} version: {}] Received request containing binary input: name: {}; batch size: {}", servableName, servableVersion, inputName, proto.contents().bytes_contents_size()); - return true; - } + +static bool dataInRawInputContents(const TFSRequestType& request) { return false; } -template <> -bool RequestValidator::checkIfNativeFileFormatUsed(const InferenceTensor& tensor, const std::string inputName) const { - return false; + +static bool dataInRawInputContents(const KFSRequest& request) { + return request.raw_input_contents().size() > 0; } -static bool shouldValidateBinaryBatchSizeMismatch(const ovms::InferenceRequest& request) { - return true; +static const std::string* getRawInputContents(const ovms::InferenceRequest& request, size_t bufferId) { + SPDLOG_DEBUG("Raw input contents is not supported for C-API"); + throw std::runtime_error("Raw input contents used in C-API flow."); + return nullptr; } -static bool shouldValidateBinaryBatchSizeMismatch(const TFSRequestType& request) { - return true; +static const std::string* getRawInputContents(const TFSRequestType& request, size_t bufferId) { + SPDLOG_DEBUG("Raw input contents is not supported for TFS API"); + throw std::runtime_error("Raw input contents used in TFSflow."); + return nullptr; } -static bool shouldValidateBinaryBatchSizeMismatch(const KFSRequest& request) { - return request.raw_input_contents().size() <= 0; +static const std::string* getRawInputContents(const KFSRequest& request, size_t bufferId) { + return &(request.raw_input_contents().at(bufferId)); } +#define RETURN_IF_ERR(X) \ + { \ + auto status = (X); \ + if (!status.ok()) \ + return status; \ + } + template Status RequestValidator::validate() { Status finalStatus = StatusCode::OK; - auto status = validateNumberOfInputs(); - if (!status.ok()) - return status; - status = validateRequestCoherency(); - if (!status.ok()) - return status; + RETURN_IF_ERR(validateNumberOfInputs()); + RETURN_IF_ERR(validateRequestCoherency()); size_t bufferId = 0; for (const auto& [name, inputInfo] : inputsInfo) { - status = validateAndGetInput(request, name, it, bufferId); - if (!status.ok()) - return status; + RETURN_IF_ERR(validateAndGetInput(request, name, it, bufferId)); const auto& proto = getInputFromIt(it); - status = checkIfShapeValuesNegative(proto); - if (!status.ok()) - return status; + RETURN_IF_ERR(checkIfShapeValuesNegative(proto)); + + // Batch and mode retrieval for given input auto batchIndex = inputInfo->getLayout().getBatchIndex(); if (!batchIndex.has_value()) { SPDLOG_DEBUG("[servable name: {} version: {}] Missing batch index in input: {} layout: {}", @@ -848,38 +944,50 @@ Status RequestValidator:: } const Dimension& batchSize = inputInfo->getShape()[batchIndex.value()]; Mode shapeMode = getShapeMode(shapeInfo, name); - if (checkIfNativeFileFormatUsed(proto, name)) { - status = validateNumberOfBinaryInputShapeDimensions(proto); - if (!status.ok()) - return status; - - if (shouldValidateBinaryBatchSizeMismatch(request)) { - status = checkBinaryBatchSizeMismatch(proto, batchSize, finalStatus, batchingMode, shapeMode); - if (!status.ok()) - return status; - } else if (batchSize != 1) { - SPDLOG_DEBUG("When the image is placed in raw_inputs_contents batch size cannot be bigger than 1."); - return StatusCode::INVALID_BATCH_SIZE; - } - continue; + if (requiresPreProcessing(proto)) { + const auto processingHint = inputInfo->getPreProcessingHint(); + int32_t inputBatchSize = 0; + size_t inputWidth = 0; + if (dataInRawInputContents(request)) { + const std::string* buffer = getRawInputContents(request, bufferId); + RETURN_IF_ERR(getRawInputContentsBatchSizeAndWidth(*buffer, inputBatchSize, inputWidth)); + } else { + inputBatchSize = getStringBatchSize(proto); + inputWidth = getStringInputWidth(proto); + } + if (processingHint == TensorInfo::ProcessingHint::STRING_1D_U8) { + SPDLOG_DEBUG("[servable name: {} version: {}] Validating request containing 1D string input: name: {}; batch size: {}", + servableName, servableVersion, name, batchSize.toString()); + RETURN_IF_ERR(validateNumberOfBinaryInputShapeDimensions(proto)); + continue; + } else if (processingHint == TensorInfo::ProcessingHint::STRING_2D_U8) { + SPDLOG_DEBUG("[servable name: {} version: {}] Validating request containing 2D string input: name: {}; batch size: {}", + servableName, servableVersion, name, batchSize.toString()); + RETURN_IF_ERR(validateNumberOfBinaryInputShapeDimensions(proto)); + RETURN_IF_ERR(validateAgainstMax2DStringArraySize(inputBatchSize, inputWidth)); + RETURN_IF_ERR(checkBinaryBatchSizeMismatch(proto, batchSize, finalStatus, batchingMode, shapeMode, inputBatchSize)); + RETURN_IF_ERR(checkStringShapeMismatch(proto, *inputInfo, finalStatus, batchingMode, shapeMode, inputBatchSize, inputWidth)); + continue; + } else if (processingHint == TensorInfo::ProcessingHint::IMAGE) { + SPDLOG_DEBUG("[servable name: {} version: {}] Validating request containing binary image input: name: {}; batch size: {}", + servableName, servableVersion, name, batchSize.toString()); + RETURN_IF_ERR(validateNumberOfBinaryInputShapeDimensions(proto)); + RETURN_IF_ERR(checkBinaryBatchSizeMismatch(proto, batchSize, finalStatus, batchingMode, shapeMode, inputBatchSize)); + continue; + } else { + SPDLOG_DEBUG("Request input: {} requires conversion but endpoint specifies no processing hint. Number of dimensions: {}; precision: {}; demultiplexer: {}", + name, inputInfo->getShape().size(), toString(inputInfo->getPrecision()), inputInfo->isInfluencedByDemultiplexer()); + return StatusCode::NOT_IMPLEMENTED; + } } - status = validatePrecision(*inputInfo, proto); - if (!status.ok()) - return status; - status = validateNumberOfShapeDimensions(*inputInfo, proto); - if (!status.ok()) - return status; - status = checkBatchSizeMismatch(proto, batchSize, batchIndex.value(), finalStatus, batchingMode, shapeMode); - if (!status.ok()) - return status; - status = checkShapeMismatch(proto, *inputInfo, batchIndex.value(), finalStatus, batchingMode, shapeMode); - if (!status.ok()) - return status; - status = validateTensorContent(proto, inputInfo->getPrecision(), bufferId); - if (!status.ok()) - return status; + // Data Array Proto + RETURN_IF_ERR(validatePrecision(*inputInfo, proto)); + RETURN_IF_ERR(validateNumberOfShapeDimensions(*inputInfo, proto)); + RETURN_IF_ERR(checkBatchSizeMismatch(proto, batchSize, batchIndex.value(), finalStatus, batchingMode, shapeMode)); + RETURN_IF_ERR(checkShapeMismatch(proto, *inputInfo, batchIndex.value(), finalStatus, batchingMode, shapeMode)); + RETURN_IF_ERR(validateTensorContent(proto, inputInfo->getPrecision(), bufferId)); } return finalStatus; } @@ -899,7 +1007,7 @@ Status validate(const KFSRequest& request, const tensor_map_t& inputsInfo, const template <> Status validate(const InferenceRequest& request, const tensor_map_t& inputsInfo, const std::string& servableName, const model_version_t servableVersion, const std::set& optionalAllowedInputNames, const Mode batchingMode, const shapes_info_map_t& shapeInfo) { OVMS_PROFILE_FUNCTION(); - return RequestValidator(request, inputsInfo, servableName, servableVersion, optionalAllowedInputNames, batchingMode, shapeInfo).validate(); + return RequestValidator(request, inputsInfo, servableName, servableVersion, optionalAllowedInputNames, batchingMode, shapeInfo).validate(); } } // namespace request_validation_utils } // namespace ovms diff --git a/src/prediction_service.cpp b/src/prediction_service.cpp index bd4aa32b72..f79c95006e 100644 --- a/src/prediction_service.cpp +++ b/src/prediction_service.cpp @@ -106,6 +106,7 @@ grpc::Status ovms::PredictionServiceImpl::Predict( SPDLOG_DEBUG("Requested model: {} does not exist. Searching for pipeline with that name...", request->model_spec().name()); status = getPipeline(request, response, pipelinePtr); } + if (!status.ok()) { if (modelInstance) { INCREMENT_IF_ENABLED(modelInstance->getMetricReporter().requestFailGrpcPredict); diff --git a/src/prediction_service_utils.cpp b/src/prediction_service_utils.cpp index 1e9d3fe259..e6a7b47cdf 100644 --- a/src/prediction_service_utils.cpp +++ b/src/prediction_service_utils.cpp @@ -115,9 +115,18 @@ std::map getRequestShapes(const InferenceRequest* request) return request->getRequestShapes(); } -template <> -bool useSharedOutputContent(const ::inference::ModelInferRequest* request) { - return request->raw_input_contents().size() > 0; +bool useSharedOutputContentFn(const tensorflow::serving::PredictRequest* request) { + // does not apply for TFS frontend + return false; +} + +bool useSharedOutputContentFn(const ::KFSRequest* request) { + return true; +} + +bool useSharedOutputContentFn(const InferenceRequest* request) { + // does not apply for C-API frontend + return false; } } // namespace ovms diff --git a/src/prediction_service_utils.hpp b/src/prediction_service_utils.hpp index e39d848a6a..4cecc07948 100644 --- a/src/prediction_service_utils.hpp +++ b/src/prediction_service_utils.hpp @@ -43,8 +43,7 @@ std::map getRequestShapes(const InferenceRequest* request) * which informs how response should be formated. Therefore return value should not have an impact for * any other frontend. */ -template -bool useSharedOutputContent(const RequestType* request) { - return true; -} +bool useSharedOutputContentFn(const tensorflow::serving::PredictRequest* request); +bool useSharedOutputContentFn(const ::KFSRequest* request); +bool useSharedOutputContentFn(const InferenceRequest* request); } // namespace ovms diff --git a/src/rest_parser.cpp b/src/rest_parser.cpp index 3f71298f95..bb7e4c7802 100644 --- a/src/rest_parser.cpp +++ b/src/rest_parser.cpp @@ -18,6 +18,7 @@ #include #include +#include "precision.hpp" #include "rest_utils.hpp" #include "status.hpp" #include "tfs_frontend/tfs_utils.hpp" @@ -123,6 +124,12 @@ bool TFSRestParser::parseArray(rapidjson::Value& doc, int dim, tensorflow::Tenso } return true; } + if (doc.IsString() && tensorPrecisionMap[tensorName] == ovms::Precision::U8 && (proto.dtype() == tensorflow::DataType::DT_UINT8 || proto.dtype() == tensorflow::DataType::DT_STRING)) { + if (!addValue(proto, doc)) { + return false; + } + return true; + } if (!doc.IsArray()) { return false; } @@ -205,7 +212,7 @@ Status TFSRestParser::parseRowFormat(rapidjson::Value& node) { return StatusCode::REST_COULD_NOT_PARSE_INSTANCE; } } - } else if (node.GetArray()[0].IsArray() || node.GetArray()[0].IsNumber() || isBinary(node.GetArray()[0])) { + } else if (node.GetArray()[0].IsArray() || node.GetArray()[0].IsNumber() || isBinary(node.GetArray()[0]) || node.GetArray()[0].IsString()) { // no named format if (requestProto.inputs_size() != 1) { return StatusCode::REST_INPUT_NOT_PREALLOCATED; @@ -415,6 +422,11 @@ bool TFSRestParser::addValue(tensorflow::TensorProto& proto, const rapidjson::Va return false; } } + if (value.IsString() && (proto.dtype() == tensorflow::DataType::DT_UINT8 || proto.dtype() == tensorflow::DataType::DT_STRING)) { + proto.add_string_val(value.GetString(), strlen(value.GetString())); + proto.set_dtype(tensorflow::DataType::DT_STRING); + return true; + } if (!value.IsNumber()) { return false; @@ -582,8 +594,21 @@ Status KFSRestParser::parseData(rapidjson::Value& node, ::KFSRequest::InferInput } else if (input.datatype() == "BOOL") { HANDLE_VALUE(mutable_bool_contents, GetBool, IsBool) } else if (input.datatype() == "BYTES") { - SPDLOG_DEBUG("For REST datatype BYTES is supported only with binary data extension"); - return StatusCode::REST_COULD_NOT_PARSE_INPUT; + for (auto& value : node.GetArray()) { + if (value.IsArray()) { + auto status = parseData(value, input); + if (!status.ok()) { + return status; + } + continue; + } + if (value.IsString()) { + input.mutable_contents()->add_bytes_contents(value.GetString()); + } else { + SPDLOG_DEBUG("BYTES datatype used in REST request, but data contains non string JSON values"); + return StatusCode::REST_COULD_NOT_PARSE_INPUT; + } + } } else { return StatusCode::REST_UNSUPPORTED_PRECISION; } diff --git a/src/rest_utils.cpp b/src/rest_utils.cpp index a3b3864c01..8c28d215a8 100644 --- a/src/rest_utils.cpp +++ b/src/rest_utils.cpp @@ -180,6 +180,13 @@ Status makeJsonFromPredictResponse( tensor.add_uint64_val(*reinterpret_cast(tensor.mutable_tensor_content()->data() + i)); } break; + case DataType::DT_STRING: + if (seekDataInValField) { + auto status = checkValField(tensor.string_val_size(), tensor.tensor_shape().dim(0).size()); + if (!status.ok()) + return status; + } + break; default: return StatusCode::REST_UNSUPPORTED_PRECISION; } @@ -288,6 +295,43 @@ static void appendBinaryOutput(std::string& bytesOutputsBuffer, char* output, si } \ } +#define PARSE_OUTPUT_DATA_STRING(CONTENTS_FIELD, WRITER_TYPE) \ + expectedContentSize = 0; \ + if (seekDataInValField) { \ + if (binaryOutput) { \ + for (auto& sentence : tensor.contents().CONTENTS_FIELD()) { \ + uint32_t length = static_cast( \ + sentence.size()); \ + expectedContentSize += length + 4; \ + appendBinaryOutput(bytesOutputsBuffer, \ + (char*)&length, sizeof(length)); \ + appendBinaryOutput( \ + bytesOutputsBuffer, \ + (char*)sentence.data(), \ + length); \ + } \ + } else { \ + for (auto& sentence : tensor.contents().CONTENTS_FIELD()) { \ + writer.WRITER_TYPE(sentence.data(), sentence.size()); \ + } \ + } \ + } else { \ + if (binaryOutput) { \ + expectedContentSize += response_proto.raw_output_contents(tensor_it).size(); \ + appendBinaryOutput(bytesOutputsBuffer, (char*)response_proto.raw_output_contents(tensor_it).data(), response_proto.raw_output_contents(tensor_it).size()); \ + } else { \ + size_t i = 0; \ + while (i < response_proto.raw_output_contents(tensor_it).size()) { \ + uint32_t length = *(reinterpret_cast(response_proto.raw_output_contents(tensor_it).data() + i)); \ + i += sizeof(length); \ + if (i + length > response_proto.raw_output_contents(tensor_it).size()) \ + return StatusCode::INTERNAL_ERROR; \ + writer.WRITER_TYPE(response_proto.raw_output_contents(tensor_it).data() + i, length); \ + i += length; \ + } \ + } \ + } + static Status parseOutputs(const ::KFSResponse& response_proto, rapidjson::PrettyWriter& writer, std::string& bytesOutputsBuffer, const std::set& binaryOutputsNames) { writer.Key("outputs"); writer.StartArray(); @@ -298,13 +342,15 @@ static Status parseOutputs(const ::KFSResponse& response_proto, rapidjson::Prett int tensor_it = 0; for (const auto& tensor : response_proto.outputs()) { size_t dataTypeSize = KFSDataTypeSize(tensor.datatype()); + // expected size calculated for static types + // for BYTES the calculation is dynamic inside PARSE_OUTPUT_DATA_EX since all strings can be of different length size_t expectedContentSize = dataTypeSize; for (int i = 0; i < tensor.shape().size(); i++) { expectedContentSize *= tensor.shape().at(i); } size_t expectedElementsNumber = dataTypeSize > 0 ? expectedContentSize / dataTypeSize : 0; - if (!seekDataInValField && (response_proto.raw_output_contents(tensor_it).size() != expectedContentSize)) + if (!seekDataInValField && (tensor.datatype() != "BYTES" && response_proto.raw_output_contents(tensor_it).size() != expectedContentSize)) return StatusCode::REST_SERIALIZE_TENSOR_CONTENT_INVALID_SIZE; writer.StartObject(); writer.Key("name"); @@ -317,7 +363,7 @@ static Status parseOutputs(const ::KFSResponse& response_proto, rapidjson::Prett writer.EndArray(); writer.Key("datatype"); writer.String(tensor.datatype().c_str()); - bool binaryOutput = ((binaryOutputsNames.find(tensor.name().c_str()) != binaryOutputsNames.end()) || (tensor.datatype() == "BYTES")); + bool binaryOutput = ((binaryOutputsNames.find(tensor.name().c_str()) != binaryOutputsNames.end())); if (!binaryOutput) { writer.Key("data"); writer.StartArray(); @@ -343,20 +389,7 @@ static Status parseOutputs(const ::KFSResponse& response_proto, rapidjson::Prett } else if (tensor.datatype() == "UINT64") { PARSE_OUTPUT_DATA(uint64_contents, uint64_t, Uint64) } else if (tensor.datatype() == "BYTES") { - if (seekDataInValField) { - size_t bytesContentsSize = 0; - for (auto& bytesStr : tensor.contents().bytes_contents()) { - bytesContentsSize += bytesStr.size(); - } - auto status = checkValField(bytesContentsSize, expectedElementsNumber); - if (!status.ok()) - return status; - for (auto& bytesStr : tensor.contents().bytes_contents()) { - bytesOutputsBuffer.append(bytesStr); - } - } else { - appendBinaryOutput(bytesOutputsBuffer, (char*)response_proto.raw_output_contents(tensor_it).data(), expectedContentSize); - } + PARSE_OUTPUT_DATA_STRING(bytes_contents, String) } else { return StatusCode::REST_UNSUPPORTED_PRECISION; } @@ -428,10 +461,6 @@ Status makeJsonFromPredictResponse( } Status decodeBase64(std::string& bytes, std::string& decodedBytes) { - auto status = Status(absl::Base64Unescape(bytes, &decodedBytes) ? StatusCode::OK : StatusCode::REST_BASE64_DECODE_ERROR); - if (!status.ok()) { - return status; - } - return StatusCode::OK; + return absl::Base64Unescape(bytes, &decodedBytes) ? StatusCode::OK : StatusCode::REST_BASE64_DECODE_ERROR; } } // namespace ovms diff --git a/src/s3filesystem.cpp b/src/s3filesystem.cpp index 916891dfcf..810ba4e632 100644 --- a/src/s3filesystem.cpp +++ b/src/s3filesystem.cpp @@ -48,8 +48,6 @@ namespace ovms { namespace s3 = Aws::S3; namespace fs = std::filesystem; -const std::string S3FileSystem::S3_URL_PREFIX = "s3://"; - StatusCode S3FileSystem::parsePath(const std::string& path, std::string* bucket, std::string* object) { std::smatch sm; @@ -59,7 +57,7 @@ StatusCode S3FileSystem::parsePath(const std::string& path, std::string* bucket, } if (!std::regex_match(path, sm, s3_regex_)) { - int bucket_start = path.find(S3_URL_PREFIX) + S3_URL_PREFIX.size(); + int bucket_start = path.find(FileSystem::S3_URL_PREFIX) + FileSystem::S3_URL_PREFIX.size(); int bucket_end = path.find("/", bucket_start); if (bucket_end > bucket_start) { @@ -85,8 +83,8 @@ StatusCode S3FileSystem::parsePath(const std::string& path, std::string* bucket, S3FileSystem::S3FileSystem(const Aws::SDKOptions& options, const std::string& s3_path) : options_(options), - s3_regex_(S3_URL_PREFIX + "([0-9a-zA-Z-.]+):([0-9]+)/([0-9a-z.-]+)(((/" - "[0-9a-zA-Z.-_]+)*)?)"), + s3_regex_(FileSystem::S3_URL_PREFIX + "([0-9a-zA-Z-.]+):([0-9]+)/([0-9a-z.-]+)(((/" + "[0-9a-zA-Z.-_]+)*)?)"), proxy_regex_("^(https?)://(([^:]{1,128}):([^@]{1,256})@)?([^:/]{1,255})(:([0-9]{1,5}))?/?") { Aws::Client::ClientConfiguration config; Aws::Auth::AWSCredentials credentials; @@ -254,7 +252,7 @@ StatusCode S3FileSystem::getDirectoryContents(const std::string& path, std::set< if (status != StatusCode::OK) { return status; } - std::string true_path = S3_URL_PREFIX + bucket + '/' + dir_path; + std::string true_path = FileSystem::S3_URL_PREFIX + bucket + '/' + dir_path; // Capture the full path to facilitate content listing full_dir = appendSlash(dir_path); @@ -297,7 +295,7 @@ StatusCode S3FileSystem::getDirectorySubdirs(const std::string& path, std::setbegin(); iter != subdirs->end();) { bool is_dir; - status = isDirectory(joinPath({true_path, *iter}), &is_dir); + status = isDirectory(FileSystem::joinPath({true_path, *iter}), &is_dir); if (status != StatusCode::OK) { return status; } @@ -328,7 +326,7 @@ StatusCode S3FileSystem::getDirectoryFiles(const std::string& path, std::setbegin(); iter != files->end();) { bool is_dir; - status = isDirectory(joinPath({true_path, *iter}), &is_dir); + status = isDirectory(FileSystem::joinPath({true_path, *iter}), &is_dir); if (status != StatusCode::OK) { return status; } @@ -412,7 +410,7 @@ StatusCode S3FileSystem::downloadFileFolder(const std::string& path, const std:: host_port = sm[2]; bucket = sm[3]; object = sm[4]; - effective_path = S3_URL_PREFIX + bucket + object; + effective_path = FileSystem::S3_URL_PREFIX + bucket + object; } else { effective_path = path; } @@ -431,8 +429,8 @@ StatusCode S3FileSystem::downloadFileFolder(const std::string& path, const std:: for (auto iter = contents.begin(); iter != contents.end(); ++iter) { bool is_subdir; - std::string s3_fpath = joinPath({effective_path, *iter}); - std::string local_fpath = joinPath({local_path, *iter}); + std::string s3_fpath = FileSystem::joinPath({effective_path, *iter}); + std::string local_fpath = FileSystem::joinPath({local_path, *iter}); status = isDirectory(s3_fpath, &is_subdir); if (status != StatusCode::OK) { return status; @@ -452,7 +450,7 @@ StatusCode S3FileSystem::downloadFileFolder(const std::string& path, const std:: return s; } for (auto itr = subdir_files.begin(); itr != subdir_files.end(); ++itr) { - files.insert(joinPath({s3_fpath, *itr})); + files.insert(FileSystem::joinPath({s3_fpath, *itr})); } } else { files.insert(s3_fpath); @@ -479,7 +477,7 @@ StatusCode S3FileSystem::downloadFileFolder(const std::string& path, const std:: auto& retrieved_file = get_object_outcome.GetResultWithOwnership().GetBody(); std::string s3_removed_path = (*iter).substr(effective_path.size()); - std::string local_file_path = joinPath({local_path, s3_removed_path}); + std::string local_file_path = FileSystem::joinPath({local_path, s3_removed_path}); std::ofstream output_file(local_file_path.c_str(), std::ios::binary); output_file << retrieved_file.rdbuf(); output_file.close(); diff --git a/src/s3filesystem.hpp b/src/s3filesystem.hpp index 56757c3ddf..650d06b906 100644 --- a/src/s3filesystem.hpp +++ b/src/s3filesystem.hpp @@ -124,8 +124,6 @@ class S3FileSystem : public FileSystem { */ StatusCode deleteFileFolder(const std::string& path) override; - static const std::string S3_URL_PREFIX; - private: /** * @brief diff --git a/src/schema.cpp b/src/schema.cpp index 887d9eea70..14f3f8723f 100644 --- a/src/schema.cpp +++ b/src/schema.cpp @@ -25,8 +25,83 @@ #include namespace ovms { -const char* MODELS_CONFIG_SCHEMA = R"({ - "definitions": { +const std::string MODEL_CONFIG_DEFINITION = R"( +"model_config": { + "type": "object", + "required": ["config"], + "properties": { + "config": { + "type": "object", + "required": ["name", "base_path"], + "properties": { + "name": { + "type": "string" + }, + "base_path": { + "type": "string" + }, + "batch_size": { + "type": ["integer", "string"], + "minimum": 0 + }, + "model_version_policy": { + "$ref": "#/definitions/model_version_policy" + }, + "shape": { + "$ref": "#/definitions/layout_shape_def" + }, + "layout": { + "$ref": "#/definitions/layout_shape_def" + }, + "nireq": { + "type": "integer", + "minimum": 0 + }, + "target_device": { + "type": "string" + }, + "allow_cache": { + "type": "boolean" + }, + "plugin_config": { + "type": "object", + "additionalProperties": {"anyOf": [ + {"type": "string"}, + {"type": "number"} + ]} + }, + "stateful": { + "type": "boolean" + }, + "idle_sequence_cleanup": { + "type": "boolean" + }, + "low_latency_transformation": { + "type": "boolean" + }, + "max_sequence_number": { + "type": "integer", + "minimum": 0 + }, + "custom_loader_options": { + "type": "object", + "required": ["loader_name"], + "properties": { + "loader_name": { + "type": "string" + } + }, + "minProperties": 1 + } + }, + "additionalProperties": false + }, + "additionalProperties": false +})"; + +const std::string MODELS_CONFIG_SCHEMA = R"({ + "definitions": {)" + MODEL_CONFIG_DEFINITION + + R"(}, "custom_loader_config": { "type": "object", "required": ["config"], @@ -112,79 +187,25 @@ const char* MODELS_CONFIG_SCHEMA = R"({ } ] }, - "model_config": { - "type": "object", - "required": ["config"], - "properties": { - "config": { - "type": "object", - "required": ["name", "base_path"], - "properties": { - "name": { - "type": "string" - }, - "base_path": { - "type": "string" - }, - "batch_size": { - "type": ["integer", "string"], - "minimum": 0 - }, - "model_version_policy": { - "$ref": "#/definitions/model_version_policy" - }, - "shape": { - "$ref": "#/definitions/layout_shape_def" - }, - "layout": { - "$ref": "#/definitions/layout_shape_def" - }, - "nireq": { - "type": "integer", - "minimum": 0 - }, - "target_device": { - "type": "string" - }, - "allow_cache": { - "type": "boolean" - }, - "plugin_config": { - "type": "object", - "additionalProperties": {"anyOf": [ - {"type": "string"}, - {"type": "number"} - ]} - }, - "stateful": { - "type": "boolean" - }, - "idle_sequence_cleanup": { - "type": "boolean" - }, - "low_latency_transformation": { - "type": "boolean" - }, - "max_sequence_number": { - "type": "integer", - "minimum": 0 - }, - "custom_loader_options": { - "type": "object", - "required": ["loader_name"], - "properties": { - "loader_name": { - "type": "string" - } - }, - "minProperties": 1 - } - }, - "additionalProperties": false - }, - "additionalProperties": false - } - }, + "mediapipe_config": { + "type": "object", + "required": ["name"], + "properties": { + "name": { + "type": "string" + }, + "base_path": { + "type": "string" + }, + "graph_path": { + "type": "string" + }, + "subconfig": { + "type": "string" + } + }, + "additionalProperties": false + }, "source_node_names": { "type": "object", "required": ["node_name", "data_item"], @@ -341,13 +362,21 @@ const char* MODELS_CONFIG_SCHEMA = R"({ "$ref": "#/definitions/model_config" } }, - "pipeline_config_list": { + "pipeline_config_list": { "type": "array", "items": { "$ref": "#/definitions/pipeline_config" } - }, - "custom_node_library_config_list": { + },)" + +#if (MEDIAPIPE_DISABLE == 0) + R"("mediapipe_config_list": { + "type": "array", + "items": { + "$ref": "#/definitions/mediapipe_config" + } + },)" + +#endif + R"("custom_node_library_config_list": { "type": "array", "items": { "$ref": "#/definitions/custom_node_library_config" @@ -397,6 +426,23 @@ const char* MODELS_MAPPING_SCHEMA = R"( "additionalProperties": false })"; +const std::string MEDIAPIPE_SUBCONFIG_SCHEMA = R"({ + "definitions": {)" + MODEL_CONFIG_DEFINITION + + R"(}, + "type": "object", + "required": ["model_config_list"], + "properties": { + "model_config_list": { + "type": "array", + "items": { + "$ref": "#/definitions/model_config" + } + } + }, + "additionalProperties": false +} +})"; + StatusCode validateJsonAgainstSchema(rapidjson::Document& json, const char* schema) { rapidjson::Document schemaJson; rapidjson::ParseResult parsingSucceeded = schemaJson.Parse(schema); diff --git a/src/schema.hpp b/src/schema.hpp index 3c61e9d8d2..12899c08f5 100644 --- a/src/schema.hpp +++ b/src/schema.hpp @@ -15,13 +15,16 @@ //***************************************************************************** #pragma once +#include + #include #include "status.hpp" namespace ovms { -extern const char* MODELS_CONFIG_SCHEMA; +extern const std::string MODELS_CONFIG_SCHEMA; extern const char* MODELS_MAPPING_SCHEMA; +extern const std::string MEDIAPIPE_SUBCONFIG_SCHEMA; StatusCode validateJsonAgainstSchema(rapidjson::Document& json, const char* schema); } // namespace ovms diff --git a/src/serialization.cpp b/src/serialization.cpp index 2c8435e77c..e918930650 100644 --- a/src/serialization.cpp +++ b/src/serialization.cpp @@ -17,13 +17,16 @@ #include "kfs_frontend/kfs_utils.hpp" #include "ov_utils.hpp" +#include "precision.hpp" +#include "status.hpp" +#include "tensor_conversion.hpp" #include "tfs_frontend/tfs_utils.hpp" namespace ovms { static Status serializePrecision( tensorflow::TensorProto& responseOutput, - const std::shared_ptr& servableOutput, + const std::shared_ptr& servableOutput, ov::Tensor& tensor) { OVMS_PROFILE_FUNCTION(); if (servableOutput->getOvPrecision() != tensor.get_element_type()) { @@ -62,7 +65,7 @@ static Status serializePrecision( static Status serializePrecision( ::KFSResponse::InferOutputTensor& responseOutput, - const std::shared_ptr& servableOutput, + const std::shared_ptr& servableOutput, ov::Tensor& tensor) { OVMS_PROFILE_FUNCTION(); if (servableOutput->getOvPrecision() != tensor.get_element_type()) { @@ -72,6 +75,10 @@ static Status serializePrecision( tensor.get_element_type().get_type_name()); return StatusCode::INTERNAL_ERROR; } + if (servableOutput->getPrecision() == ovms::Precision::U8 && servableOutput->getPostProcessingHint() == TensorInfo::ProcessingHint::STRING_2D_U8) { + responseOutput.set_datatype("BYTES"); + return StatusCode::OK; + } switch (servableOutput->getPrecision()) { case ovms::Precision::FP64: case ovms::Precision::FP32: @@ -102,7 +109,7 @@ static Status serializePrecision( static Status serializeShape( tensorflow::TensorProto& responseOutput, - const std::shared_ptr& servableOutput, + const std::shared_ptr& servableOutput, ov::Tensor& tensor) { OVMS_PROFILE_FUNCTION(); responseOutput.mutable_tensor_shape()->Clear(); @@ -127,7 +134,7 @@ static Status serializeShape( static Status serializeShape( ::KFSResponse::InferOutputTensor& responseOutput, - const std::shared_ptr& servableOutput, + const std::shared_ptr& servableOutput, ov::Tensor& tensor) { OVMS_PROFILE_FUNCTION(); responseOutput.clear_shape(); @@ -138,6 +145,10 @@ static Status serializeShape( servableOutput->getName(), effectiveNetworkOutputShape.size(), actualTensorShape.size()); return StatusCode::INTERNAL_ERROR; } + if (servableOutput->getPostProcessingHint() == TensorInfo::ProcessingHint::STRING_2D_U8) { + responseOutput.add_shape(tensor.get_shape()[0]); + return StatusCode::OK; + } for (size_t i = 0; i < effectiveNetworkOutputShape.size(); ++i) { dimension_value_t dim = actualTensorShape[i]; if (!effectiveNetworkOutputShape[i].match(dim)) { @@ -159,6 +170,22 @@ static void serializeContent(std::string* content, ov::Tensor& tensor) { } } +static void serializeStringContent(std::string* content, ov::Tensor& tensor) { + OVMS_PROFILE_FUNCTION(); + // We only fill if the content is not already filled. + // It can be filled in gather exit node handler. + if (!content->empty()) { + return; + } + + size_t batchSize = tensor.get_shape()[0]; + size_t maxStringLen = tensor.get_shape()[1]; + for (size_t i = 0; i < batchSize; i++) { + uint32_t strLen = strnlen((char*)tensor.data() + i * maxStringLen, maxStringLen); + content->append(reinterpret_cast(&strLen), sizeof(strLen)); + content->append((char*)tensor.data() + i * maxStringLen, strLen); + } +} #define SERIALIZE_BY_DATATYPE(contents, datatype) \ for (size_t i = 0; i < tensor.get_byte_size(); i += sizeof(datatype)) { \ auto value = responseOutput.mutable_contents()->contents()->Add(); \ @@ -194,9 +221,12 @@ static void serializeContent(::inference::ModelInferResponse::InferOutputTensor& Status serializeTensorToTensorProto( tensorflow::TensorProto& responseOutput, - const std::shared_ptr& servableOutput, + const std::shared_ptr& servableOutput, ov::Tensor& tensor) { OVMS_PROFILE_FUNCTION(); + if (servableOutput->getPostProcessingHint() == TensorInfo::ProcessingHint::STRING_2D_U8) { + return convertOVTensor2DToStringResponse(tensor, responseOutput); + } auto status = serializePrecision(responseOutput, servableOutput, tensor); if (!status.ok()) { return status; @@ -212,7 +242,7 @@ Status serializeTensorToTensorProto( Status serializeTensorToTensorProtoRaw( ::inference::ModelInferResponse::InferOutputTensor& responseOutput, std::string* rawOutputContents, - const std::shared_ptr& servableOutput, + const std::shared_ptr& servableOutput, ov::Tensor& tensor) { OVMS_PROFILE_FUNCTION(); auto status = serializePrecision(responseOutput, servableOutput, tensor); @@ -223,15 +253,22 @@ Status serializeTensorToTensorProtoRaw( if (!status.ok()) { return status; } - serializeContent(rawOutputContents, tensor); + if (servableOutput->getPostProcessingHint() == TensorInfo::ProcessingHint::STRING_2D_U8) { + serializeStringContent(rawOutputContents, tensor); + } else { + serializeContent(rawOutputContents, tensor); + } return StatusCode::OK; } Status serializeTensorToTensorProto( ::KFSResponse::InferOutputTensor& responseOutput, - const std::shared_ptr& servableOutput, + const std::shared_ptr& servableOutput, ov::Tensor& tensor) { OVMS_PROFILE_FUNCTION(); + if (servableOutput->getPostProcessingHint() == TensorInfo::ProcessingHint::STRING_2D_U8) { + return convertOVTensor2DToStringResponse(tensor, responseOutput); + } auto status = serializePrecision(responseOutput, servableOutput, tensor); if (!status.ok()) { return status; diff --git a/src/serialization.hpp b/src/serialization.hpp index ab7e11f701..5b99b6d6eb 100644 --- a/src/serialization.hpp +++ b/src/serialization.hpp @@ -61,23 +61,23 @@ class ProtoGetter { Status serializeTensorToTensorProto( tensorflow::TensorProto& responseOutput, - const std::shared_ptr& servableOutput, + const std::shared_ptr& servableOutput, ov::Tensor& tensor); Status serializeTensorToTensorProto( ::KFSResponse::InferOutputTensor& responseOutput, - const std::shared_ptr& servableOutput, + const std::shared_ptr& servableOutput, ov::Tensor& tensor); Status serializeTensorToTensorProtoRaw( ::inference::ModelInferResponse::InferOutputTensor& responseOutput, std::string* rawOutputContents, - const std::shared_ptr& servableOutput, + const std::shared_ptr& servableOutput, ov::Tensor& tensor); Status serializeTensorToTensorProto( InferenceTensor& responseOutput, - const std::shared_ptr& servableOutput, + const std::shared_ptr& servableOutput, ov::Tensor& tensor); typedef const std::string& (*outputNameChooser_t)(const std::string&, const TensorInfo&); @@ -92,7 +92,7 @@ Status serializePredictResponse( const tensor_map_t& outputMap, tensorflow::serving::PredictResponse* response, outputNameChooser_t outputNameChooser, - bool useSharedOutputContent = true) { + bool useSharedOutputContent = true) { // does not apply for TFS frontend OVMS_PROFILE_FUNCTION(); Status status; ProtoGetter protoGetter(response); @@ -151,7 +151,8 @@ Status serializePredictResponse( model_version_t servableVersion, const tensor_map_t& outputMap, InferenceResponse* response, - outputNameChooser_t outputNameChooser) { + outputNameChooser_t outputNameChooser, + bool useSharedOutputContent = true) { // does not apply for C-API frontend OVMS_PROFILE_FUNCTION(); Status status; uint32_t outputId = 0; @@ -204,7 +205,7 @@ Status serializePredictResponse( status = response->addOutput( outputInfo->getMappedName(), getPrecisionAsOVMSDataType(actualPrecision), - tensor.get_shape().data(), + reinterpret_cast(tensor.get_shape().data()), tensor.get_shape().size()); if (status == StatusCode::DOUBLE_TENSOR_INSERT) { // DAG demultiplexer CAPI handling @@ -234,4 +235,5 @@ Status serializePredictResponse( } return StatusCode::OK; } + } // namespace ovms diff --git a/src/servablemanagermodule.cpp b/src/servablemanagermodule.cpp index 82b092567c..5ff57dce5b 100644 --- a/src/servablemanagermodule.cpp +++ b/src/servablemanagermodule.cpp @@ -38,7 +38,7 @@ ServableManagerModule::ServableManagerModule(ovms::Server& ovmsServer) { Status ServableManagerModule::start(const ovms::Config& config) { state = ModuleState::STARTED_INITIALIZE; SPDLOG_INFO("{} starting", SERVABLE_MANAGER_MODULE_NAME); - auto status = servableManager->start(config); + auto status = getServableManager().start(config); if (status.ok()) { state = ModuleState::INITIALIZED; SPDLOG_INFO("{} started", SERVABLE_MANAGER_MODULE_NAME); @@ -52,7 +52,7 @@ void ServableManagerModule::shutdown() { return; state = ModuleState::STARTED_SHUTDOWN; SPDLOG_INFO("{} shutting down", SERVABLE_MANAGER_MODULE_NAME); - servableManager->join(); + getServableManager().join(); state = ModuleState::SHUTDOWN; SPDLOG_INFO("{} shutdown", SERVABLE_MANAGER_MODULE_NAME); } diff --git a/src/servablemetadata.cpp b/src/servablemetadata.cpp new file mode 100644 index 0000000000..790a4f0e78 --- /dev/null +++ b/src/servablemetadata.cpp @@ -0,0 +1,48 @@ +//***************************************************************************** +// Copyright 2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include "servablemetadata.hpp" + +namespace ovms { + +const ov::AnyMap ServableMetadata::EMPTY_RT_INFO; +ServableMetadata::ServableMetadata(const std::string& name, + model_version_t version, + const tensor_map_t& inputsInfo, + const tensor_map_t& outputsInfo, + const ov::AnyMap& anyMap) : + name(name), + version(version), + inputsInfo(inputsInfo), + outputsInfo(outputsInfo), + info(anyMap) { + for (auto& [key, tensorInfo] : this->inputsInfo) { + auto& inDimsMin = this->inDimMin[key]; + auto& inDimsMax = this->inDimMax[key]; + for (auto& dim : tensorInfo->getShape()) { + inDimsMin.emplace_back(dim.isAny() ? -1 : dim.getLowerBound()); + inDimsMax.emplace_back(dim.isAny() ? -1 : dim.getUpperBound()); + } + } + for (auto& [key, tensorInfo] : this->outputsInfo) { + auto& outDimsMin = this->outDimMin[key]; + auto& outDimsMax = this->outDimMax[key]; + for (auto& dim : tensorInfo->getShape()) { + outDimsMin.emplace_back(dim.isAny() ? -1 : dim.getLowerBound()); + outDimsMax.emplace_back(dim.isAny() ? -1 : dim.getUpperBound()); + } + } +} +} // namespace ovms diff --git a/src/servablemetadata.hpp b/src/servablemetadata.hpp new file mode 100644 index 0000000000..0d683888c7 --- /dev/null +++ b/src/servablemetadata.hpp @@ -0,0 +1,58 @@ +//***************************************************************************** +// Copyright 2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once + +#include +#include +#include +#include + +#include "modelversion.hpp" +#include "tensorinfo.hpp" + +namespace ovms { +using capi_tensor_shapes_map_t = std::unordered_map>; + +class ServableMetadata { + static const ov::AnyMap EMPTY_RT_INFO; + const std::string name; + const model_version_t version; + tensor_map_t inputsInfo; + tensor_map_t outputsInfo; + capi_tensor_shapes_map_t inDimMin; + capi_tensor_shapes_map_t inDimMax; + capi_tensor_shapes_map_t outDimMin; + capi_tensor_shapes_map_t outDimMax; + // for now this returns ov::Model::get_rt_info("model_info") + ov::AnyMap info; + +public: + ServableMetadata(const std::string& name, + model_version_t version, + const tensor_map_t& inputsInfo, + const tensor_map_t& outputsInfo, + const ov::AnyMap& anyMap = EMPTY_RT_INFO); + const tensor_map_t& getInputsInfo() const { return inputsInfo; } + const tensor_map_t& getOutputsInfo() const { return outputsInfo; } + const capi_tensor_shapes_map_t& getInputDimsMin() const { return this->inDimMin; } + const capi_tensor_shapes_map_t& getInputDimsMax() const { return this->inDimMax; } + const capi_tensor_shapes_map_t& getOutputDimsMin() const { return this->outDimMin; } + const capi_tensor_shapes_map_t& getOutputDimsMax() const { return this->outDimMax; } + const ov::AnyMap& getInfo() const { + return this->info; + } +}; +} // namespace ovms diff --git a/src/server.cpp b/src/server.cpp index 1b48acfab2..c6f193879b 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -225,7 +225,9 @@ void Server::setShutdownRequest(int i) { shutdown_request = i; } -Server::~Server() = default; +Server::~Server() { + this->shutdownModules(); +} std::unique_ptr Server::createModule(const std::string& name) { #ifdef MTR_ENABLED @@ -361,8 +363,10 @@ int Server::start(int argc, char** argv) { // C-API Start Status Server::start(ServerSettingsImpl* serverSettings, ModelsSettingsImpl* modelsSettings) { try { - if (this->isLive()) + if (this->isLive()) { + SPDLOG_ERROR("Cannot start OVMS - server is already live"); return StatusCode::SERVER_ALREADY_STARTED; + } auto& config = ovms::Config::instance(); if (!config.parse(serverSettings, modelsSettings)) return StatusCode::OPTIONS_USAGE_ERROR; diff --git a/src/shape.hpp b/src/shape.hpp index 5f32bcd604..478e2ab9e2 100644 --- a/src/shape.hpp +++ b/src/shape.hpp @@ -15,6 +15,7 @@ //***************************************************************************** #pragma once +#include #include #include #include @@ -23,7 +24,6 @@ namespace ovms { class Status; - using dimension_value_t = std::int64_t; constexpr dimension_value_t DYNAMIC_DIMENSION = -1; @@ -33,6 +33,22 @@ constexpr char DIMENSION_RANGE_DELIMETER = ':'; enum Mode { FIXED, AUTO }; using shape_t = std::vector; +using signed_shape_t = std::vector; + +template +std::string shapeToString(const T& shape) { + std::ostringstream oss; + oss << "("; + size_t i = 0; + if (shape.size() > 0) { + for (; i < shape.size() - 1; i++) { + oss << shape[i] << ","; + } + oss << shape[i]; + } + oss << ")"; + return oss.str(); +} class Dimension { dimension_value_t minimum, maximum; @@ -68,7 +84,6 @@ class Dimension { bool isAny() const; std::optional createIntersection(const Dimension& other) const; -private: dimension_value_t getLowerBound() const; dimension_value_t getUpperBound() const; }; diff --git a/src/status.cpp b/src/status.cpp index bb8a49f5cd..109847c7ba 100644 --- a/src/status.cpp +++ b/src/status.cpp @@ -96,6 +96,9 @@ const std::unordered_map Status::statusMess {StatusCode::INVALID_SHAPE, "Invalid input shape"}, {StatusCode::INVALID_BUFFER_TYPE, "Invalid input buffer type"}, {StatusCode::INVALID_DEVICE_ID, "Invalid input buffer device id"}, + {StatusCode::INVALID_STRING_INPUT, "Invalid string input"}, + {StatusCode::INVALID_STRING_MAX_SIZE_EXCEEDED, "Maximum 2D array after string conversion exceeded 1GB"}, + {StatusCode::INVALID_INPUT_FORMAT, "Inputs inside buffer does not match expected format."}, {StatusCode::INVALID_PRECISION, "Invalid input precision"}, {StatusCode::INVALID_VALUE_COUNT, "Invalid number of values in tensor proto container"}, {StatusCode::INVALID_CONTENT_SIZE, "Invalid content size of tensor proto"}, @@ -187,6 +190,18 @@ const std::unordered_map Status::statusMess {StatusCode::PIPELINE_DEMULTIPLEXER_NO_RESULTS, "Pipeline execution aborted due to no content from custom node"}, {StatusCode::PIPELINE_INPUTS_AMBIGUOUS_METADATA, "Multiple nodes connected to the same pipeline input require different tensor metadata"}, + // Mediapipe + {StatusCode::MEDIAPIPE_GRAPH_START_ERROR, "Failed to start mediapipe graph"}, + {StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID, "Failed to read protobuf graph configuration file"}, + {StatusCode::MEDIAPIPE_GRAPH_INITIALIZATION_ERROR, "Failed to initalize mediapipe graph"}, + {StatusCode::MEDIAPIPE_GRAPH_ADD_OUTPUT_STREAM_ERROR, "Failed to add mediapipe graph output stream"}, + {StatusCode::MEDIAPIPE_GRAPH_CLOSE_INPUT_STREAM_ERROR, "Failed to close mediapipe graph input stream"}, + {StatusCode::MEDIAPIPE_GRAPH_ADD_PACKET_INPUT_STREAM, "Failed to add packet to mediapipe graph input stream"}, // TODO add http/gRPC conversions for retCodes of Mediapipe + {StatusCode::MEDIAPIPE_DEFINITION_NAME_MISSING, "Model with requested name is not found"}, + {StatusCode::MEDIAPIPE_EXECUTION_ERROR, "Mediapipe execution failed. MP status"}, + {StatusCode::MEDIAPIPE_DEFINITION_NOT_LOADED_ANYMORE, "Mediapipe is retired"}, + {StatusCode::MEDIAPIPE_DEFINITION_NOT_LOADED_YET, "Mediapipe is not loaded yet"}, + // Storage errors // S3 {StatusCode::S3_BUCKET_NOT_FOUND, "S3 Bucket not found"}, @@ -267,22 +282,14 @@ const std::unordered_map Status::statusMess {StatusCode::DOUBLE_TENSOR_INSERT, "Cannot insert more than one tensor with the same name"}, {StatusCode::DOUBLE_PARAMETER_INSERT, "Cannot insert more than one parameter with the same name"}, {StatusCode::NONEXISTENT_BUFFER_FOR_REMOVAL, "Tried to remove nonexisting buffer"}, - {StatusCode::NONEXISTENT_DATA, "Tried to use nonexisting data"}, - {StatusCode::NONEXISTENT_STRING, "Tried to use nonexisting string"}, - {StatusCode::NONEXISTENT_NUMBER, "Tried to use nonexisting number"}, - {StatusCode::NONEXISTENT_SETTINGS, "Tried to use nonexisting settings"}, - {StatusCode::NONEXISTENT_PARAMETER_FOR_REMOVAL, "Tried to remove nonexisting parameter"}, - {StatusCode::NONEXISTENT_RESPONSE, "Tried to use nonexisting response"}, - {StatusCode::NONEXISTENT_REQUEST, "Tried to use nonexisting request"}, - {StatusCode::NONEXISTENT_SERVER, "Tried to use nonexisting server"}, - {StatusCode::NONEXISTENT_TABLE, "Tried to use nonexisting table"}, + {StatusCode::NONEXISTENT_PARAMETER, "Tried to use nonexisting parameter"}, {StatusCode::NONEXISTENT_TENSOR, "Tried to get nonexisting tensor"}, {StatusCode::NONEXISTENT_TENSOR_FOR_SET_BUFFER, "Tried to set buffer for nonexisting tensor"}, {StatusCode::NONEXISTENT_TENSOR_FOR_REMOVE_BUFFER, "Tried to remove buffer for nonexisting tensor"}, {StatusCode::NONEXISTENT_TENSOR_FOR_REMOVAL, "Tried to remove nonexisting tensor"}, - {StatusCode::NONEXISTENT_STATUS, "Tried to use nonexisting status"}, {StatusCode::NONEXISTENT_LOG_LEVEL, "Tried to use nonexisting log level"}, - {StatusCode::SERVER_NOT_READY_FOR_INFERENCE, "Server not in a state to perform inference"}, + {StatusCode::NONEXISTENT_PTR, "Tried to use nonexisting pointer"}, + {StatusCode::SERVER_NOT_READY, "Server is not ready"}, // Server Start errors {StatusCode::OPTIONS_USAGE_ERROR, "options validation error"}, diff --git a/src/status.hpp b/src/status.hpp index 206d0dd212..b6be4158cb 100644 --- a/src/status.hpp +++ b/src/status.hpp @@ -87,18 +87,21 @@ enum class StatusCode { MAX_SEQUENCE_NUMBER_REACHED, /*!< Model handles maximum number of sequences and will not accept new ones */ // Predict request validation - INVALID_NO_OF_INPUTS, /*!< Invalid number of inputs */ - INVALID_MISSING_INPUT, /*!< Missing one or more of inputs */ - INVALID_MISSING_OUTPUT, /*!< Missing one or more of outputs */ - INVALID_NO_OF_SHAPE_DIMENSIONS, /*!< Invalid number of shape dimensions */ - INVALID_BATCH_SIZE, /*!< Input batch size other than required */ - INVALID_SHAPE, /*!< Invalid shape dimension number or dimension value */ - INVALID_PRECISION, /*!< Invalid precision */ - INVALID_VALUE_COUNT, /*!< Invalid value count error status for uint16 and half float data types */ - INVALID_CONTENT_SIZE, /*!< Invalid content size error status for types using tensor_content() */ - INVALID_MESSAGE_STRUCTURE, /*!< Buffers can't be both in raw_input_content & input tensor content */ - INVALID_BUFFER_TYPE, /*!< Invalid buffer type */ - INVALID_DEVICE_ID, /*!< Invalid buffer device id */ + INVALID_NO_OF_INPUTS, /*!< Invalid number of inputs */ + INVALID_MISSING_INPUT, /*!< Missing one or more of inputs */ + INVALID_MISSING_OUTPUT, /*!< Missing one or more of outputs */ + INVALID_NO_OF_SHAPE_DIMENSIONS, /*!< Invalid number of shape dimensions */ + INVALID_BATCH_SIZE, /*!< Input batch size other than required */ + INVALID_SHAPE, /*!< Invalid shape dimension number or dimension value */ + INVALID_PRECISION, /*!< Invalid precision */ + INVALID_VALUE_COUNT, /*!< Invalid value count error status for uint16 and half float data types */ + INVALID_CONTENT_SIZE, /*!< Invalid content size error status for types using tensor_content() */ + INVALID_MESSAGE_STRUCTURE, /*!< Buffers can't be both in raw_input_content & input tensor content */ + INVALID_BUFFER_TYPE, /*!< Invalid buffer type */ + INVALID_DEVICE_ID, /*!< Invalid buffer device id */ + INVALID_STRING_INPUT, /*!< Invalid string input */ + INVALID_INPUT_FORMAT, /*!< Invalid format of the input inside buffer */ + INVALID_STRING_MAX_SIZE_EXCEEDED, /*!< Maximum 2D array after string conversion exceeded 1GB */ // Deserialization OV_UNSUPPORTED_DESERIALIZATION_PRECISION, /*!< Unsupported deserialization precision, theoretically should never be returned since ModelInstance::validation checks against model precision */ @@ -232,6 +235,18 @@ enum class StatusCode { PIPELINE_DEMULTIPLEXER_NO_RESULTS, PIPELINE_INPUTS_AMBIGUOUS_METADATA, + // Mediapipe + MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID, + MEDIAPIPE_GRAPH_INITIALIZATION_ERROR, + MEDIAPIPE_GRAPH_ADD_OUTPUT_STREAM_ERROR, + MEDIAPIPE_GRAPH_START_ERROR, + MEDIAPIPE_GRAPH_CLOSE_INPUT_STREAM_ERROR, + MEDIAPIPE_GRAPH_ADD_PACKET_INPUT_STREAM, + MEDIAPIPE_DEFINITION_NAME_MISSING, + MEDIAPIPE_EXECUTION_ERROR, + MEDIAPIPE_DEFINITION_NOT_LOADED_ANYMORE, + MEDIAPIPE_DEFINITION_NOT_LOADED_YET, + // Custom Loader CUSTOM_LOADER_LIBRARY_INVALID, CUSTOM_LOADER_LIBRARY_LOAD_FAILED, @@ -278,22 +293,14 @@ enum class StatusCode { DOUBLE_TENSOR_INSERT, DOUBLE_PARAMETER_INSERT, NONEXISTENT_BUFFER_FOR_REMOVAL, - NONEXISTENT_DATA, - NONEXISTENT_STRING, - NONEXISTENT_NUMBER, - NONEXISTENT_SETTINGS, - NONEXISTENT_PARAMETER_FOR_REMOVAL, // rename to non existen parameter - NONEXISTENT_SERVER, - NONEXISTENT_RESPONSE, - NONEXISTENT_REQUEST, - NONEXISTENT_TABLE, + NONEXISTENT_PARAMETER, NONEXISTENT_TENSOR, NONEXISTENT_TENSOR_FOR_SET_BUFFER, NONEXISTENT_TENSOR_FOR_REMOVE_BUFFER, NONEXISTENT_TENSOR_FOR_REMOVAL, - NONEXISTENT_STATUS, NONEXISTENT_LOG_LEVEL, - SERVER_NOT_READY_FOR_INFERENCE, + NONEXISTENT_PTR, + SERVER_NOT_READY, // Server Start errors OPTIONS_USAGE_ERROR, @@ -315,13 +322,14 @@ class Status { ensureMessageAllocated(); *this->message += " - " + details; } + +public: void ensureMessageAllocated() { if (nullptr == message) { message = std::make_unique(); } } -public: Status(StatusCode code = StatusCode::OK) : code(code) { if (code == StatusCode::OK) { @@ -380,10 +388,8 @@ class Status { const std::string& string() const { return this->message ? *this->message : statusMessageMap.at(code); } - operator const std::string&() const { return this->string(); } }; - } // namespace ovms diff --git a/src/systeminfo.cpp b/src/systeminfo.cpp index b2a9d8fbf9..87007cade3 100644 --- a/src/systeminfo.cpp +++ b/src/systeminfo.cpp @@ -20,31 +20,14 @@ #include #include +#include + #include "logging.hpp" #include "status.hpp" -#include "systeminfo_impl.hpp" namespace ovms { -const char* CPUSET_FILENAME = "/sys/fs/cgroup/cpuset/cpuset.cpus"; uint16_t getCoreCount() { - std::ifstream fs; - uint16_t coreCount = 1; - auto status = getCPUSetFile(fs, CPUSET_FILENAME); - if (status.ok()) { - std::string cpusets; - fs >> cpusets; - status = getCoreCountImpl(cpusets, coreCount); - } - if (status.ok()) { - return coreCount; - } else { - SPDLOG_ERROR("Failed to read system core count from cpuset file. Falling back to std::thread::hardware_concurrency"); - auto hwConcurrency = std::thread::hardware_concurrency(); - if (hwConcurrency == 0) { - SPDLOG_ERROR("Failed to read core count number of system. Fallback to treating system as 1 core only"); - return 1; - } - return hwConcurrency; - } + // return parallel_get_num_threads(); + return std::thread::hardware_concurrency(); } } // namespace ovms diff --git a/src/systeminfo_impl.cpp b/src/systeminfo_impl.cpp deleted file mode 100644 index 0bb0e78902..0000000000 --- a/src/systeminfo_impl.cpp +++ /dev/null @@ -1,67 +0,0 @@ -//***************************************************************************** -// Copyright 2023 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -//***************************************************************************** -#include "systeminfo_impl.hpp" - -#include - -#include "logging.hpp" -#include "status.hpp" -#include "stringutils.hpp" - -namespace ovms { -Status getCoreCountImpl(const std::string& cpusets, uint16_t& coreCount) { - auto sets = tokenize(cpusets, ','); - uint16_t totalCount = 0; - for (auto& set : sets) { - if (std::string::npos == set.find('-')) { - if (stou32(set).has_value()) { - totalCount += 1; - } else { - return StatusCode::FILESYSTEM_ERROR; - } - } else { - auto bounds = tokenize(set, '-'); - // this handles both single minus numbers as well as two range sets - if (bounds.size() != 2) { - return StatusCode::FILESYSTEM_ERROR; - } - auto rbound = stou32(bounds[1]); - if (!rbound.has_value()) { - return StatusCode::FILESYSTEM_ERROR; - } - auto lbound = stou32(bounds[0]); - if (!lbound.has_value()) { - return StatusCode::FILESYSTEM_ERROR; - } - if (rbound <= lbound) { - return StatusCode::FILESYSTEM_ERROR; - } - uint16_t setCount = rbound.value() - lbound.value() + 1; - totalCount += setCount; - } - } - coreCount = totalCount; - return StatusCode::OK; -} -Status getCPUSetFile(std::ifstream& ifs, const std::string& filename) { - ifs.open(filename, std::ios_base::in); - if (ifs.fail()) { - SPDLOG_ERROR("Failed to open file: {}", filename); - return StatusCode::FILESYSTEM_ERROR; - } - return StatusCode::OK; -} -} // namespace ovms diff --git a/src/binaryutils.cpp b/src/tensor_conversion.cpp similarity index 60% rename from src/binaryutils.cpp rename to src/tensor_conversion.cpp index 684d0adc38..2b0d2928f1 100644 --- a/src/binaryutils.cpp +++ b/src/tensor_conversion.cpp @@ -13,7 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. //***************************************************************************** -#include "binaryutils.hpp" +#include "tensor_conversion.hpp" #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wall" @@ -21,6 +21,7 @@ #include "tensorflow_serving/apis/prediction_service.grpc.pb.h" #pragma GCC diagnostic pop +#include #include #include #include @@ -28,10 +29,12 @@ #include +#include "kfs_frontend/kfs_utils.hpp" #include "logging.hpp" #include "opencv2/opencv.hpp" #include "profiler.hpp" #include "status.hpp" +#include "tfs_frontend/tfs_utils.hpp" #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wall" @@ -96,7 +99,7 @@ static Status convertPrecision(const cv::Mat& src, cv::Mat& dst, const ovms::Pre return StatusCode::OK; } -static Status validateLayout(const std::shared_ptr& tensorInfo) { +static Status validateLayout(const std::shared_ptr& tensorInfo) { OVMS_PROFILE_FUNCTION(); static const std::string binarySupportedLayout = "N...HWC"; if (!tensorInfo->getLayout().createIntersection(Layout(binarySupportedLayout), tensorInfo->getShape().size()).has_value()) { @@ -121,7 +124,7 @@ static Status resizeMat(const cv::Mat& src, cv::Mat& dst, const dimension_value_ return StatusCode::OK; } -static Status validateNumberOfChannels(const std::shared_ptr& tensorInfo, +static Status validateNumberOfChannels(const std::shared_ptr& tensorInfo, const cv::Mat input, cv::Mat* firstBatchImage) { OVMS_PROFILE_FUNCTION(); @@ -162,7 +165,7 @@ static Status validateResolutionAgainstFirstBatchImage(const cv::Mat input, cv:: return StatusCode::BINARY_IMAGES_RESOLUTION_MISMATCH; } -static bool checkBatchSizeMismatch(const std::shared_ptr& tensorInfo, +static bool checkBatchSizeMismatch(const std::shared_ptr& tensorInfo, const int batchSize) { OVMS_PROFILE_FUNCTION(); if (!tensorInfo->getBatchSize().has_value() || batchSize == 0) { @@ -171,7 +174,7 @@ static bool checkBatchSizeMismatch(const std::shared_ptr& tensorInfo return !tensorInfo->getBatchSize().value().match(batchSize); } -static Status validateInput(const std::shared_ptr& tensorInfo, const cv::Mat input, cv::Mat* firstBatchImage, bool enforceResolutionAlignment) { +static Status validateInput(const std::shared_ptr& tensorInfo, const cv::Mat input, cv::Mat* firstBatchImage, bool enforceResolutionAlignment) { // Binary inputs are supported for any endpoint that is compatible with N...HWC layout. // With unknown layout, there is no way to deduce expected endpoint input resolution. // This forces binary utility to create tensors with resolution inherited from first batch of binary input image (request). @@ -187,7 +190,7 @@ static Status validateInput(const std::shared_ptr& tensorInfo, const return validateNumberOfChannels(tensorInfo, input, firstBatchImage); } -static Status validateTensor(const std::shared_ptr& tensorInfo, +static Status validateTensor(const std::shared_ptr& tensorInfo, const tensorflow::TensorProto& src, const std::string* buffer) { OVMS_PROFILE_FUNCTION(); @@ -218,7 +221,16 @@ static Status validateTensor(const std::shared_ptr& tensorInfo, return StatusCode::OK; } -static Status validateTensor(const std::shared_ptr& tensorInfo, +inline static int getNumberOfInputs(const std::string* buffer) { + int32_t batchSize; + size_t width; + auto status = getRawInputContentsBatchSizeAndWidth(*buffer, batchSize, width); + if (!status.ok()) + return 0; + return batchSize; +} + +static Status validateTensor(const std::shared_ptr& tensorInfo, const ::KFSRequest::InferInputTensor& src, const std::string* buffer) { OVMS_PROFILE_FUNCTION(); @@ -234,7 +246,7 @@ static Status validateTensor(const std::shared_ptr& tensorInfo, return StatusCode::INVALID_SHAPE; } - size_t batchSize = !rawInputsContentsUsed ? src.contents().bytes_contents_size() : 1; + size_t batchSize = !rawInputsContentsUsed ? src.contents().bytes_contents_size() : getNumberOfInputs(buffer); if (checkBatchSizeMismatch(tensorInfo, batchSize)) { SPDLOG_DEBUG("Input: {} request batch size is incorrect. Expected: {} Actual: {}", tensorInfo->getMappedName(), @@ -260,7 +272,7 @@ static Status validateTensor(const std::shared_ptr& tensorInfo, return StatusCode::OK; } -static Dimension getTensorInfoHeightDim(const std::shared_ptr& tensorInfo) { +static Dimension getTensorInfoHeightDim(const std::shared_ptr& tensorInfo) { size_t numberOfShapeDimensions = tensorInfo->getShape().size(); if (numberOfShapeDimensions < 4 || numberOfShapeDimensions > 5) { throw std::logic_error("wrong number of shape dimensions"); @@ -269,7 +281,7 @@ static Dimension getTensorInfoHeightDim(const std::shared_ptr& tenso return tensorInfo->getShape()[position]; } -static Dimension getTensorInfoWidthDim(const std::shared_ptr& tensorInfo) { +static Dimension getTensorInfoWidthDim(const std::shared_ptr& tensorInfo) { size_t numberOfShapeDimensions = tensorInfo->getShape().size(); if (numberOfShapeDimensions < 4 || numberOfShapeDimensions > 5) { throw std::logic_error("wrong number of shape dimensions"); @@ -307,7 +319,7 @@ static void updateTargetResolution(Dimension& height, Dimension& width, const cv } } -static bool isResizeSupported(const std::shared_ptr& tensorInfo) { +static bool isResizeSupported(const std::shared_ptr& tensorInfo) { for (const auto& dim : tensorInfo->getShape()) { if (dim.isAny()) { return false; @@ -337,8 +349,27 @@ inline static int getBinaryInputsSize(const ::KFSRequest::InferInputTensor& tens return tensor.contents().bytes_contents_size(); } +inline static Status getInputs(const std::string* buffer, std::vector& inputs) { + if (buffer == nullptr) { + return StatusCode::OK; + } + size_t offset = 0; + while (offset + sizeof(uint32_t) <= buffer->size()) { + uint64_t inputSize = *(reinterpret_cast(buffer->data() + offset)); + offset += sizeof(uint32_t); + if (offset + inputSize > buffer->size()) + break; + inputs.push_back(buffer->substr(offset, inputSize)); + offset += inputSize; + } + if (offset != buffer->size()) { + return StatusCode::IMAGE_PARSING_FAILED; + } + return StatusCode::OK; +} + template -static Status convertTensorToMatsMatchingTensorInfo(const TensorType& src, std::vector& images, const std::shared_ptr& tensorInfo, const std::string* buffer) { +static Status convertTensorToMatsMatchingTensorInfo(const TensorType& src, std::vector& images, const std::shared_ptr& tensorInfo, const std::string* buffer) { OVMS_PROFILE_FUNCTION(); Dimension targetHeight = getTensorInfoHeightDim(tensorInfo); Dimension targetWidth = getTensorInfoWidthDim(tensorInfo); @@ -348,9 +379,14 @@ static Status convertTensorToMatsMatchingTensorInfo(const TensorType& src, std:: bool enforceResolutionAlignment = !resizeSupported; bool rawInputsContentsUsed = (buffer != nullptr); - int numberOfInputs = (!rawInputsContentsUsed ? getBinaryInputsSize(src) : 1); + std::vector inputs; + auto status = getInputs(buffer, inputs); + if (status != StatusCode::OK) { + return status; + } + int numberOfInputs = (!rawInputsContentsUsed ? getBinaryInputsSize(src) : inputs.size()); for (int i = 0; i < numberOfInputs; i++) { - cv::Mat image = convertStringToMat(!rawInputsContentsUsed ? getBinaryInput(src, i) : *buffer); + cv::Mat image = convertStringToMat(!rawInputsContentsUsed ? getBinaryInput(src, i) : inputs[i]); if (image.data == nullptr) return StatusCode::IMAGE_PARSING_FAILED; cv::Mat* firstImage = images.size() == 0 ? nullptr : &images.at(0); @@ -394,7 +430,7 @@ static Status convertTensorToMatsMatchingTensorInfo(const TensorType& src, std:: return StatusCode::OK; } -static shape_t getShapeFromImages(const std::vector& images, const std::shared_ptr& tensorInfo) { +static shape_t getShapeFromImages(const std::vector& images, const std::shared_ptr& tensorInfo) { OVMS_PROFILE_FUNCTION(); shape_t dims; dims.push_back(images.size()); @@ -407,7 +443,7 @@ static shape_t getShapeFromImages(const std::vector& images, const std: return dims; } -static ov::Tensor createTensorFromMats(const std::vector& images, const std::shared_ptr& tensorInfo) { +static ov::Tensor createTensorFromMats(const std::vector& images, const std::shared_ptr& tensorInfo) { OVMS_PROFILE_FUNCTION(); ov::Shape shape = getShapeFromImages(images, tensorInfo); ov::element::Type precision = tensorInfo->getOvPrecision(); @@ -420,7 +456,7 @@ static ov::Tensor createTensorFromMats(const std::vector& images, const return tensor; } -static ov::Tensor convertMatsToTensor(std::vector& images, const std::shared_ptr& tensorInfo) { +static ov::Tensor convertMatsToTensor(std::vector& images, const std::shared_ptr& tensorInfo) { OVMS_PROFILE_FUNCTION(); switch (tensorInfo->getPrecision()) { case ovms::Precision::FP32: @@ -443,24 +479,187 @@ static ov::Tensor convertMatsToTensor(std::vector& images, const std::s } template -static Status convertNativeFileFormatRequestTensorToOVTensor(const TensorType& src, ov::Tensor& tensor, const std::shared_ptr& tensorInfo, const std::string* buffer) { +static Status convertNativeFileFormatRequestTensorToOVTensor(const TensorType& src, ov::Tensor& tensor, const std::shared_ptr& tensorInfo, const std::string* buffer) { OVMS_PROFILE_FUNCTION(); auto status = validateTensor(tensorInfo, src, buffer); if (status != StatusCode::OK) { + SPDLOG_DEBUG("Input native file format validation failed"); return status; } std::vector images; status = convertTensorToMatsMatchingTensorInfo(src, images, tensorInfo, buffer); if (!status.ok()) { + SPDLOG_DEBUG("Input native file format conversion failed"); return status; } tensor = convertMatsToTensor(images, tensorInfo); if (!tensor) { + SPDLOG_DEBUG("Input native file format conversion failed"); return StatusCode::IMAGE_PARSING_FAILED; } return StatusCode::OK; } -template Status convertNativeFileFormatRequestTensorToOVTensor(const tensorflow::TensorProto& src, ov::Tensor& tensor, const std::shared_ptr& tensorInfo, const std::string* buffer); -template Status convertNativeFileFormatRequestTensorToOVTensor<::KFSRequest::InferInputTensor>(const ::KFSRequest::InferInputTensor& src, ov::Tensor& tensor, const std::shared_ptr& tensorInfo, const std::string* buffer); +static Status convertStringRequestFromBufferToOVTensor2D(const tensorflow::TensorProto& src, ov::Tensor& tensor, const std::string* buffer) { + return StatusCode::NOT_IMPLEMENTED; +} + +static Status convertStringRequestFromBufferToOVTensor2D(const ::KFSRequest::InferInputTensor& src, ov::Tensor& tensor, const std::string* buffer) { + size_t batchSize = 0; + size_t offset = 0; + size_t maxStringLength = 0; + while (offset + sizeof(uint32_t) <= buffer->size()) { + uint64_t inputSize = *(reinterpret_cast(buffer->data() + offset)); + offset += (sizeof(uint32_t) + inputSize); + maxStringLength = std::max(maxStringLength, inputSize); + batchSize++; + } + if (offset != buffer->size()) { + SPDLOG_DEBUG("Input string format conversion failed"); + return StatusCode::INVALID_STRING_INPUT; + } + size_t width = maxStringLength + 1; + offset = 0; + tensor = ov::Tensor(ov::element::Type_t::u8, ov::Shape{batchSize, width}); + for (size_t i = 0; i < batchSize; i++) { + uint64_t inputSize = *(reinterpret_cast(buffer->data() + offset)); + offset += sizeof(uint32_t); + auto data = tensor.data() + i * width; + std::memcpy(data, reinterpret_cast(buffer->data() + offset), inputSize); + for (size_t j = inputSize; j < width; j++) { + data[j] = 0; + } + offset += inputSize; + } + return StatusCode::OK; +} + +template +Status convertStringRequestToOVTensor2D( + const TensorType& src, + ov::Tensor& tensor, + const std::string* buffer) { + OVMS_PROFILE_FUNCTION(); + if (buffer != nullptr) { + return convertStringRequestFromBufferToOVTensor2D(src, tensor, buffer); + } + int batchSize = getBinaryInputsSize(src); + size_t maxStringLength = 0; + for (int i = 0; i < batchSize; i++) { + maxStringLength = std::max(maxStringLength, getBinaryInput(src, i).size()); + } + size_t width = maxStringLength + 1; + tensor = ov::Tensor(ov::element::Type_t::u8, ov::Shape{static_cast(batchSize), width}); + for (int i = 0; i < batchSize; i++) { + std::memcpy( + tensor.data() + i * width, + reinterpret_cast(getBinaryInput(src, i).c_str()), + getBinaryInput(src, i).size()); + for (size_t j = getBinaryInput(src, i).size(); j < width; j++) { + tensor.data()[i * width + j] = 0; + } + } + return StatusCode::OK; +} + +static Status convertStringRequestFromBufferToOVTensor1D(const tensorflow::TensorProto& src, ov::Tensor& tensor, const std::string* buffer) { + return StatusCode::NOT_IMPLEMENTED; +} + +static Status convertStringRequestFromBufferToOVTensor1D(const ::KFSRequest::InferInputTensor& src, ov::Tensor& tensor, const std::string* buffer) { + std::vector stringSizes; + uint32_t totalStringsLength = 0; + while (totalStringsLength + stringSizes.size() * sizeof(uint32_t) + sizeof(uint32_t) <= buffer->size()) { + uint32_t inputSize = *(reinterpret_cast(buffer->data() + totalStringsLength + stringSizes.size() * sizeof(uint32_t))); + stringSizes.push_back(inputSize); + totalStringsLength += inputSize; + } + size_t batchSize = stringSizes.size(); + if ((totalStringsLength + batchSize * sizeof(uint32_t)) != buffer->size()) { + SPDLOG_DEBUG("Input string format conversion failed"); + return StatusCode::INVALID_STRING_INPUT; + } + size_t metadataLength = sizeof(uint32_t) * (batchSize + 2); + size_t width = totalStringsLength + metadataLength; + tensor = ov::Tensor(ov::element::Type_t::u8, ov::Shape{width}); + uint32_t* data = reinterpret_cast(tensor.data()); + data[0] = static_cast(batchSize); + data[1] = 0; // first string start offset + unsigned char* condensedStringsStart = tensor.data() + metadataLength; + size_t tensorStringsOffset = 0; + for (size_t i = 0; i < stringSizes.size(); i++) { + data[i + 2] = data[i + 1] + stringSizes[i]; + std::memcpy(condensedStringsStart + tensorStringsOffset, reinterpret_cast(buffer->data() + (i + 1) * sizeof(uint32_t) + tensorStringsOffset), stringSizes[i]); + tensorStringsOffset += stringSizes[i]; + } + return StatusCode::OK; +} + +template +Status convertStringRequestToOVTensor1D(const TensorType& src, ov::Tensor& tensor, const std::string* buffer) { + if (buffer != nullptr) { + return convertStringRequestFromBufferToOVTensor1D(src, tensor, buffer); + } + int batchSize = getBinaryInputsSize(src); + size_t totalStringsLength = 0; + for (int i = 0; i < batchSize; i++) { + totalStringsLength += getBinaryInput(src, i).size(); + } + // space for metadata: + // - batch size (uint32_t) x 1 + // - first string start offset (uint32_t) x 1 + // - end offsets for each batch of string (uint32_t) x batchSize + size_t metadataLength = sizeof(uint32_t) * (batchSize + 2); + size_t width = totalStringsLength + metadataLength; + tensor = ov::Tensor(ov::element::Type_t::u8, ov::Shape{static_cast(width)}); + uint32_t* data = reinterpret_cast(tensor.data()); + data[0] = static_cast(batchSize); + data[1] = 0; // first string start offset + unsigned char* condensedStringsStart = tensor.data() + metadataLength; + for (int i = 0; i < batchSize; i++) { + // write end offset + data[i + 2] = data[i + 1] + getBinaryInput(src, i).size(); + // write the bytes + if (getBinaryInput(src, i).size()) { + std::memcpy( + condensedStringsStart + data[i + 1], + reinterpret_cast(getBinaryInput(src, i).c_str()), + getBinaryInput(src, i).size()); + } + } + return StatusCode::OK; +} + +template +Status convertOVTensor2DToStringResponse(const ov::Tensor& tensor, TensorType& dst) { + if (tensor.get_shape().size() != 2) { + return StatusCode::INTERNAL_ERROR; + } + if (tensor.get_element_type() != ov::element::Type_t::u8) { + return StatusCode::INTERNAL_ERROR; + } + size_t batchSize = tensor.get_shape()[0]; + size_t maxStringLen = tensor.get_shape()[1]; + setBatchSize(dst, batchSize); + setStringPrecision(dst); + for (size_t i = 0; i < batchSize; i++) { + const char* strStart = reinterpret_cast(tensor.data() + i * maxStringLen); + size_t strLen = strnlen(strStart, maxStringLen); + createOrGetString(dst, i).assign(strStart, strLen); + } + return StatusCode::OK; +} + +template Status convertNativeFileFormatRequestTensorToOVTensor(const tensorflow::TensorProto& src, ov::Tensor& tensor, const std::shared_ptr& tensorInfo, const std::string* buffer); +template Status convertNativeFileFormatRequestTensorToOVTensor<::KFSRequest::InferInputTensor>(const ::KFSRequest::InferInputTensor& src, ov::Tensor& tensor, const std::shared_ptr& tensorInfo, const std::string* buffer); + +template Status convertStringRequestToOVTensor2D(const tensorflow::TensorProto& src, ov::Tensor& tensor, const std::string* buffer); +template Status convertStringRequestToOVTensor2D<::KFSRequest::InferInputTensor>(const ::KFSRequest::InferInputTensor& src, ov::Tensor& tensor, const std::string* buffer); + +template Status convertStringRequestToOVTensor1D(const tensorflow::TensorProto& src, ov::Tensor& tensor, const std::string* buffer); +template Status convertStringRequestToOVTensor1D<::KFSRequest::InferInputTensor>(const ::KFSRequest::InferInputTensor& src, ov::Tensor& tensor, const std::string* buffer); + +template Status convertOVTensor2DToStringResponse(const ov::Tensor& tensor, tensorflow::TensorProto& dst); +template Status convertOVTensor2DToStringResponse<::KFSResponse::InferOutputTensor>(const ov::Tensor& tensor, ::KFSResponse::InferOutputTensor& dst); + } // namespace ovms diff --git a/src/tensor_conversion.hpp b/src/tensor_conversion.hpp new file mode 100644 index 0000000000..3ef4936a6f --- /dev/null +++ b/src/tensor_conversion.hpp @@ -0,0 +1,37 @@ +//***************************************************************************** +// Copyright 2021 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once + +#include +#include + +#include "tensorinfo.hpp" + +namespace ovms { +class Status; +template +Status convertNativeFileFormatRequestTensorToOVTensor(const TensorType& src, ov::Tensor& tensor, const std::shared_ptr& tensorInfo, const std::string* buffer); + +template +Status convertStringRequestToOVTensor2D(const TensorType& src, ov::Tensor& tensor, const std::string* buffer); + +template +Status convertStringRequestToOVTensor1D(const TensorType& src, ov::Tensor& tensor, const std::string* buffer); + +template +Status convertOVTensor2DToStringResponse(const ov::Tensor& tensor, TensorType& dst); + +} // namespace ovms diff --git a/src/tensorinfo.cpp b/src/tensorinfo.cpp index e5e4c118d6..9e036e7b2b 100644 --- a/src/tensorinfo.cpp +++ b/src/tensorinfo.cpp @@ -23,49 +23,68 @@ #include "kfs_frontend/kfs_grpc_inference_service.hpp" #include "logging.hpp" +#include "stringutils.hpp" namespace ovms { +const std::string STRING_SERIALIZATION_HINT_NAME_SUFFIX = "_string"; + // in case we change behaviour for this constructor we may need to write additional tests for TensorInfo intersection / DAGs TensorInfo::TensorInfo(const std::string& name, const Precision& precision, const Shape& shape) : - name(name), - mapping(""), - precision(precision), - shape(shape), - layout(Layout::getDefaultLayout()) {} + TensorInfo(name, "", precision, shape, Layout::getDefaultLayout()) {} TensorInfo::TensorInfo(const std::string& name, const Precision& precision, const shape_t& shape) : - name(name), - mapping(""), - precision(precision), - shape(shape), - layout(Layout::getDefaultLayout()) { -} + TensorInfo(name, "", precision, shape, Layout::getDefaultLayout()) {} TensorInfo::TensorInfo(const std::string& name, const ovms::Precision& precision, const shape_t& shape, const Layout& layout) : - name(name), - mapping(""), - precision(precision), - shape(shape), - layout(layout) { -} + TensorInfo(name, "", precision, shape, layout) {} + +TensorInfo::TensorInfo(const std::string& name, + const std::string& mapping, + const Precision& precision, + const shape_t& shape) : + TensorInfo(name, mapping, precision, shape, Layout::getDefaultLayout()) {} TensorInfo::TensorInfo(const std::string& name, const ovms::Precision& precision, const Shape& shape, const Layout& layout) : - name(name), - mapping(""), - precision(precision), - shape(shape), - layout(layout) { + TensorInfo(name, "", precision, shape, layout) {} + +TensorInfo::ProcessingHint TensorInfo::getPreProcessingHint() const { + return this->preProcessingHint; +} + +TensorInfo::ProcessingHint TensorInfo::getPostProcessingHint() const { + return this->postProcessingHint; +} + +void TensorInfo::createProcessingHints() { + // Pre + size_t expectedDimsForImage = this->influencedByDemultiplexer ? 5 : 4; + if (this->shape.size() == 2 && this->precision == ovms::Precision::U8 && !this->influencedByDemultiplexer) { + this->preProcessingHint = TensorInfo::ProcessingHint::STRING_2D_U8; + } else if (this->shape.size() == 1 && this->precision == ovms::Precision::U8 && this->shape.at(0).isDynamic() && !this->influencedByDemultiplexer) { + this->preProcessingHint = TensorInfo::ProcessingHint::STRING_1D_U8; + } else if (this->shape.size() == expectedDimsForImage) { + this->preProcessingHint = TensorInfo::ProcessingHint::IMAGE; + } else { + this->preProcessingHint = TensorInfo::ProcessingHint::NO_PROCESSING; + } + + // Post + if (this->precision == ovms::Precision::U8 && this->shape.size() == 2 && endsWith(this->getMappedName(), STRING_SERIALIZATION_HINT_NAME_SUFFIX)) { + this->postProcessingHint = TensorInfo::ProcessingHint::STRING_2D_U8; + } else { + this->postProcessingHint = TensorInfo::ProcessingHint::NO_PROCESSING; + } } TensorInfo::TensorInfo(const std::string& name, @@ -73,12 +92,8 @@ TensorInfo::TensorInfo(const std::string& name, const ovms::Precision& precision, const shape_t& shape, const Layout& layout) : - name(name), - mapping(mapping), - precision(precision), - shape(shape), - layout(layout) { -} + TensorInfo(name, mapping, precision, Shape(shape), layout) {} + TensorInfo::TensorInfo(const std::string& name, const std::string& mapping, const ovms::Precision& precision, @@ -89,16 +104,7 @@ TensorInfo::TensorInfo(const std::string& name, precision(precision), shape(shape), layout(layout) { -} -TensorInfo::TensorInfo(const std::string& name, - const std::string& mapping, - const Precision& precision, - const shape_t& shape) : - name(name), - mapping(mapping), - precision(precision), - shape(shape), - layout(Layout::getDefaultLayout()) { + createProcessingHints(); } const std::string& TensorInfo::getName() const { @@ -109,18 +115,10 @@ const std::string& TensorInfo::getMappedName() const { return mapping.size() == 0 ? name : mapping; } -void TensorInfo::setMappedName(const std::string& mappedName) { - mapping = mappedName; -} - const Precision TensorInfo::getPrecision() const { return precision; } -void TensorInfo::setPrecision(const ovms::Precision& requestedPrecision) { - precision = requestedPrecision; -} - const std::string& TensorInfo::getPrecisionAsString(Precision precision) { return toString(precision); } @@ -145,26 +143,26 @@ bool TensorInfo::isInfluencedByDemultiplexer() const { return influencedByDemultiplexer; } -void TensorInfo::setShape(const Shape& shape) { - this->shape = shape; -} - const Shape& TensorInfo::getShape() const { return this->shape; } -void TensorInfo::setLayout(const Layout& layout) { - this->layout = layout; -} - -std::shared_ptr TensorInfo::createCopyWithNewShape(const Shape& shape) const { +std::shared_ptr TensorInfo::createCopyWithNewShape(const Shape& shape) const { auto copy = std::make_shared(*this); copy->shape = shape; copy->layout = Layout::getUnspecifiedLayout(); + copy->createProcessingHints(); + return copy; +} + +std::shared_ptr TensorInfo::createCopyWithNewMappedName(const std::string& mappedName) const { + auto copy = std::make_shared(*this); + copy->mapping = mappedName; + copy->createProcessingHints(); return copy; } -std::shared_ptr TensorInfo::createCopyWithDemultiplexerDimensionPrefix(const Dimension& dim) const { +std::shared_ptr TensorInfo::createCopyWithDemultiplexerDimensionPrefix(const Dimension& dim) const { auto copy = std::make_shared(*this); copy->influencedByDemultiplexer = true; copy->shape.emplace(copy->shape.begin(), dim); @@ -174,10 +172,11 @@ std::shared_ptr TensorInfo::createCopyWithDemultiplexerDimensionPref copy->layout.replace(batchPosition, 1, std::string(1, Layout::UNDEFINED_DIMENSION_CHAR)); } copy->layout = std::string(1, Layout::BATCH_DIMENSION_LETTER[0]) + copy->layout; + copy->createProcessingHints(); return copy; } -std::shared_ptr TensorInfo::createIntersection(const TensorInfo& other) { +std::shared_ptr TensorInfo::createIntersection(const TensorInfo& other) const { if (this->isTensorUnspecified()) return std::make_shared(other); if (other.isTensorUnspecified()) @@ -223,22 +222,7 @@ bool TensorInfo::isTensorUnspecified() const { (this->getShape() == Shape()); } -std::string TensorInfo::shapeToString(const shape_t& shape) { - std::ostringstream oss; - oss << "("; - size_t i = 0; - if (shape.size() > 0) { - for (; i < shape.size() - 1; i++) { - oss << shape[i] << ","; - } - oss << shape[i]; - } - oss << ")"; - - return oss.str(); -} - -std::shared_ptr TensorInfo::getUnspecifiedTensorInfo() { +std::shared_ptr TensorInfo::getUnspecifiedTensorInfo() { return std::make_shared("", Precision::UNDEFINED, Shape{}); } diff --git a/src/tensorinfo.hpp b/src/tensorinfo.hpp index 9a2c77ae08..d0a03b1e6f 100644 --- a/src/tensorinfo.hpp +++ b/src/tensorinfo.hpp @@ -36,41 +36,20 @@ namespace ovms { class TensorInfo; -using tensor_map_t = std::map>; +using tensor_map_t = std::map>; /** * @brief Class containing information about the tensor */ class TensorInfo { -protected: - /** - * @brief Input name - */ - std::string name; - - /** - * @brief Mapping name - */ - std::string mapping; - - Precision precision; - - /** - * @brief Model input shape - */ - Shape shape; - - /** - * @brief Tensor layout - */ - Layout layout; - - /** - * @brief Information if influenced by demultiplexer - */ - bool influencedByDemultiplexer = false; - public: + enum class ProcessingHint { + IMAGE, + STRING_1D_U8, + STRING_2D_U8, + NO_PROCESSING + }; + /** * @brief Construct a new Tensor Info object * @@ -145,7 +124,6 @@ class TensorInfo { * @return const std::string& */ const std::string& getMappedName() const; - void setMappedName(const std::string& mappedName); /** * @brief Get the Precision object @@ -154,18 +132,6 @@ class TensorInfo { */ const Precision getPrecision() const; - /** - * @brief Set the Precision object - * - * @return const InferenceEngine::Precision - */ - void setPrecision(const ovms::Precision& requestedPrecision); - - /** - * @brief Set the Layout object - */ - void setLayout(const Layout& layout); - ov::element::Type getOvPrecision() const; /** @@ -205,24 +171,57 @@ class TensorInfo { * @return shape */ const Shape& getShape() const; - void setShape(const Shape& shape); + + ProcessingHint getPreProcessingHint() const; + ProcessingHint getPostProcessingHint() const; bool isInfluencedByDemultiplexer() const; - std::shared_ptr createCopyWithNewShape(const Shape& shape) const; + std::shared_ptr createCopyWithNewShape(const Shape& shape) const; + std::shared_ptr createCopyWithNewMappedName(const std::string& mappedName) const; - std::shared_ptr createCopyWithDemultiplexerDimensionPrefix(const Dimension& dim) const; - std::shared_ptr createIntersection(const TensorInfo& other); + std::shared_ptr createCopyWithDemultiplexerDimensionPrefix(const Dimension& dim) const; + std::shared_ptr createIntersection(const TensorInfo& other) const; bool isTensorUnspecified() const; bool isTensorSpecEqual(const TensorInfo& other) const; - static std::string shapeToString(const shape_t& shape); - - static std::shared_ptr getUnspecifiedTensorInfo(); + static std::shared_ptr getUnspecifiedTensorInfo(); const std::optional getBatchSize() const; + +protected: + /** + * @brief Input name + */ + std::string name; + + /** + * @brief Mapping name + */ + std::string mapping; + + Precision precision; + + /** + * @brief Model input shape + */ + Shape shape; + + /** + * @brief Tensor layout + */ + Layout layout; + + /** + * @brief Information if influenced by demultiplexer + */ + bool influencedByDemultiplexer = false; + + void createProcessingHints(); + TensorInfo::ProcessingHint preProcessingHint = TensorInfo::ProcessingHint::NO_PROCESSING; + TensorInfo::ProcessingHint postProcessingHint = TensorInfo::ProcessingHint::NO_PROCESSING; }; } // namespace ovms diff --git a/src/test/c_api/config_metadata_all.json b/src/test/c_api/config_metadata_all.json new file mode 100644 index 0000000000..0d8b70ee8d --- /dev/null +++ b/src/test/c_api/config_metadata_all.json @@ -0,0 +1,51 @@ +{ + "model_config_list": [ + { + "config": { + "name": "dummy", + "base_path": "/ovms/src/test/dummy" + } + }, + { + "config": { + "name": "dummyDynamic", + "base_path": "/ovms/src/test/dummy", + "shape": {"b":"(-1,1:10)"} + } + }, + { + "config": { + "name": "add", + "base_path": "/ovms/src/test/add_two_inputs_model", + "shape": {"input1":"(1, 3)", + "input2":"(1, 3)"} + } + } + ], + "pipeline_config_list": [ + { + "name": "pipeline1Dummy", + "inputs": ["b"], + "nodes": [ + { + "name": "dummyNode", + "model_name": "dummy", + "type": "DL model", + "inputs": [ + {"b": {"node_name": "request", + "data_item": "b"}} + ], + "outputs": [ + {"data_item": "a", + "alias": "a"} + ] + } + ], + "outputs": [ + {"a": {"node_name": "dummyNode", + "data_item": "a"} + } + ] + } + ] +} diff --git a/src/test/c_api_test_utils.hpp b/src/test/c_api_test_utils.hpp new file mode 100644 index 0000000000..2fab12a388 --- /dev/null +++ b/src/test/c_api_test_utils.hpp @@ -0,0 +1,61 @@ +//***************************************************************************** +// Copyright 2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once +#include + +#include + +#define ASSERT_CAPI_STATUS_NULL(C_API_CALL) \ + { \ + auto* err = C_API_CALL; \ + if (err != nullptr) { \ + uint32_t code = 0; \ + const char* msg = nullptr; \ + OVMS_StatusGetCode(err, &code); \ + OVMS_StatusGetDetails(err, &msg); \ + std::string smsg(msg); \ + OVMS_StatusDelete(err); \ + EXPECT_EQ(0, code) << smsg; \ + ASSERT_EQ(err, nullptr) << smsg; \ + } \ + } + +#define ASSERT_CAPI_STATUS_NOT_NULL(C_API_CALL) \ + { \ + auto* err = C_API_CALL; \ + if (err != nullptr) { \ + OVMS_StatusDelete(err); \ + } else { \ + ASSERT_NE(err, nullptr); \ + } \ + } + +#define ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(C_API_CALL, EXPECTED_STATUS_CODE) \ + { \ + auto* err = C_API_CALL; \ + if (err != nullptr) { \ + uint32_t code = 0; \ + const char* details = nullptr; \ + ASSERT_EQ(OVMS_StatusGetCode(err, &code), nullptr); \ + ASSERT_EQ(OVMS_StatusGetDetails(err, &details), nullptr); \ + ASSERT_NE(details, nullptr); \ + ASSERT_EQ(code, static_cast(EXPECTED_STATUS_CODE)) \ + << std::string{"wrong code: "} + std::to_string(code) + std::string{"; details: "} << details; \ + OVMS_StatusDelete(err); \ + } else { \ + ASSERT_NE(err, nullptr); \ + } \ + } diff --git a/src/test/c_api_tests.cpp b/src/test/c_api_tests.cpp index dd6b2b8497..8c8aa3ea46 100644 --- a/src/test/c_api_tests.cpp +++ b/src/test/c_api_tests.cpp @@ -21,8 +21,10 @@ #include #include "../buffer.hpp" +#include "../capi_frontend/capi_utils.hpp" #include "../inferenceresponse.hpp" #include "../ovms.h" +#include "c_api_test_utils.hpp" #include "test_utils.hpp" using namespace ovms; @@ -44,57 +46,15 @@ static void testDefaultSingleModelOptions(ModelsSettingsImpl* modelsSettings) { EXPECT_EQ(modelsSettings->idleSequenceCleanup, std::nullopt); } -#define ASSERT_CAPI_STATUS_NULL(C_API_CALL) \ - { \ - auto* err = C_API_CALL; \ - if (err != nullptr) { \ - uint32_t code = 0; \ - const char* msg = nullptr; \ - OVMS_StatusGetCode(err, &code); \ - OVMS_StatusGetDetails(err, &msg); \ - std::string smsg(msg); \ - OVMS_StatusDelete(err); \ - EXPECT_EQ(0, code) << smsg; \ - ASSERT_EQ(err, nullptr) << smsg; \ - } \ - } - -#define ASSERT_CAPI_STATUS_NOT_NULL(C_API_CALL) \ - { \ - auto* err = C_API_CALL; \ - if (err != nullptr) { \ - OVMS_StatusDelete(err); \ - } else { \ - ASSERT_NE(err, nullptr); \ - } \ - } - -#define ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(C_API_CALL, EXPECTED_STATUS_CODE) \ - { \ - auto* err = C_API_CALL; \ - if (err != nullptr) { \ - uint32_t code = 0; \ - const char* details = nullptr; \ - ASSERT_EQ(OVMS_StatusGetCode(err, &code), nullptr); \ - ASSERT_EQ(OVMS_StatusGetDetails(err, &details), nullptr); \ - ASSERT_NE(details, nullptr); \ - ASSERT_EQ(code, static_cast(EXPECTED_STATUS_CODE)) \ - << std::string{"wrong code: "} + std::to_string(code) + std::string{"; details: "} << details; \ - OVMS_StatusDelete(err); \ - } else { \ - ASSERT_NE(err, nullptr); \ - } \ - } - const uint AVAILABLE_CORES = std::thread::hardware_concurrency(); -TEST(CApiConfigTest, MultiModelConfiguration) { +TEST(CAPIConfigTest, MultiModelConfiguration) { OVMS_ServerSettings* _serverSettings = nullptr; OVMS_ModelsSettings* _modelsSettings = nullptr; - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsNew(nullptr), StatusCode::NONEXISTENT_SETTINGS); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsNew(nullptr), StatusCode::NONEXISTENT_PTR); ASSERT_CAPI_STATUS_NULL(OVMS_ServerSettingsNew(&_serverSettings)); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ModelsSettingsNew(nullptr), StatusCode::NONEXISTENT_SETTINGS); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ModelsSettingsNew(nullptr), StatusCode::NONEXISTENT_PTR); ASSERT_CAPI_STATUS_NULL(OVMS_ModelsSettingsNew(&_modelsSettings)); ASSERT_NE(_serverSettings, nullptr); @@ -147,28 +107,28 @@ TEST(CApiConfigTest, MultiModelConfiguration) { ASSERT_CAPI_STATUS_NULL(OVMS_ServerSettingsSetLogPath(_serverSettings, "/logs")); ASSERT_CAPI_STATUS_NULL(OVMS_ModelsSettingsSetConfigPath(_modelsSettings, "/config")); // check nullptr - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetGrpcPort(nullptr, 5555), StatusCode::NONEXISTENT_SETTINGS); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetRestPort(nullptr, 6666), StatusCode::NONEXISTENT_SETTINGS); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetGrpcWorkers(nullptr, 30), StatusCode::NONEXISTENT_SETTINGS); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetGrpcBindAddress(nullptr, "2.2.2.2"), StatusCode::NONEXISTENT_SETTINGS); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetGrpcBindAddress(_serverSettings, nullptr), StatusCode::NONEXISTENT_STRING); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetRestWorkers(nullptr, 31), StatusCode::NONEXISTENT_SETTINGS); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetRestBindAddress(nullptr, "3.3.3.3"), StatusCode::NONEXISTENT_SETTINGS); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetRestBindAddress(_serverSettings, nullptr), StatusCode::NONEXISTENT_STRING); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetGrpcChannelArguments(nullptr, "grpcargs"), StatusCode::NONEXISTENT_SETTINGS); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetGrpcChannelArguments(_serverSettings, nullptr), StatusCode::NONEXISTENT_STRING); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetFileSystemPollWaitSeconds(nullptr, 2), StatusCode::NONEXISTENT_SETTINGS); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetSequenceCleanerPollWaitMinutes(nullptr, 3), StatusCode::NONEXISTENT_SETTINGS); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetCustomNodeResourcesCleanerIntervalSeconds(nullptr, 4), StatusCode::NONEXISTENT_SETTINGS); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetCpuExtensionPath(nullptr, "/ovms/src/test"), StatusCode::NONEXISTENT_SETTINGS); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetCpuExtensionPath(_serverSettings, nullptr), StatusCode::NONEXISTENT_STRING); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetCacheDir(nullptr, "/tmp/cache"), StatusCode::NONEXISTENT_SETTINGS); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetCacheDir(_serverSettings, nullptr), StatusCode::NONEXISTENT_STRING); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetLogLevel(nullptr, OVMS_LOG_TRACE), StatusCode::NONEXISTENT_SETTINGS); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetLogPath(nullptr, "/logs"), StatusCode::NONEXISTENT_SETTINGS); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetLogPath(_serverSettings, nullptr), StatusCode::NONEXISTENT_STRING); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ModelsSettingsSetConfigPath(nullptr, "/config"), StatusCode::NONEXISTENT_SETTINGS); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ModelsSettingsSetConfigPath(_modelsSettings, nullptr), StatusCode::NONEXISTENT_STRING); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetGrpcPort(nullptr, 5555), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetRestPort(nullptr, 6666), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetGrpcWorkers(nullptr, 30), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetGrpcBindAddress(nullptr, "2.2.2.2"), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetGrpcBindAddress(_serverSettings, nullptr), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetRestWorkers(nullptr, 31), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetRestBindAddress(nullptr, "3.3.3.3"), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetRestBindAddress(_serverSettings, nullptr), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetGrpcChannelArguments(nullptr, "grpcargs"), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetGrpcChannelArguments(_serverSettings, nullptr), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetFileSystemPollWaitSeconds(nullptr, 2), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetSequenceCleanerPollWaitMinutes(nullptr, 3), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetCustomNodeResourcesCleanerIntervalSeconds(nullptr, 4), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetCpuExtensionPath(nullptr, "/ovms/src/test"), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetCpuExtensionPath(_serverSettings, nullptr), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetCacheDir(nullptr, "/tmp/cache"), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetCacheDir(_serverSettings, nullptr), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetLogLevel(nullptr, OVMS_LOG_TRACE), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetLogPath(nullptr, "/logs"), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsSetLogPath(_serverSettings, nullptr), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ModelsSettingsSetConfigPath(nullptr, "/config"), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ModelsSettingsSetConfigPath(_modelsSettings, nullptr), StatusCode::NONEXISTENT_PTR); // Test non default values EXPECT_EQ(serverSettings->grpcPort, 5555); @@ -235,11 +195,11 @@ TEST(CApiConfigTest, MultiModelConfiguration) { OVMS_ServerSettingsDelete(_serverSettings); } -TEST(CApiConfigTest, SingleModelConfiguration) { +TEST(CAPIConfigTest, SingleModelConfiguration) { GTEST_SKIP() << "Use C-API to initialize in next stages, currently not supported"; } -TEST(CApiStartTest, InitializingMultipleServers) { +TEST(CAPIStartTest, InitializingMultipleServers) { OVMS_Server* srv1 = nullptr; OVMS_Server* srv2 = nullptr; @@ -249,14 +209,14 @@ TEST(CApiStartTest, InitializingMultipleServers) { OVMS_ServerDelete(srv1); } -TEST(CApiStartTest, StartFlow) { +TEST(CAPIStartTest, StartFlow) { OVMS_Server* srv = nullptr; OVMS_ServerSettings* serverSettings = nullptr; OVMS_ModelsSettings* modelsSettings = nullptr; - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerNew(nullptr), StatusCode::NONEXISTENT_SERVER); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsNew(nullptr), StatusCode::NONEXISTENT_SETTINGS); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ModelsSettingsNew(nullptr), StatusCode::NONEXISTENT_SETTINGS); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerNew(nullptr), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerSettingsNew(nullptr), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ModelsSettingsNew(nullptr), StatusCode::NONEXISTENT_PTR); ASSERT_CAPI_STATUS_NULL(OVMS_ServerNew(&srv)); ASSERT_CAPI_STATUS_NULL(OVMS_ServerSettingsNew(&serverSettings)); ASSERT_CAPI_STATUS_NULL(OVMS_ModelsSettingsNew(&modelsSettings)); @@ -287,18 +247,18 @@ TEST(CApiStartTest, StartFlow) { OVMS_ServerDelete(srv); } -TEST(CApiStatusTest, GetCodeAndDetails) { +TEST(CAPIStatusTest, GetCodeAndDetails) { std::unique_ptr s = std::make_unique( StatusCode::INTERNAL_ERROR, "custom message"); OVMS_Status* sts = reinterpret_cast(s.get()); uint32_t code = 0; - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_StatusGetCode(nullptr, &code), StatusCode::NONEXISTENT_STATUS); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_StatusGetCode(sts, nullptr), StatusCode::NONEXISTENT_NUMBER); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_StatusGetCode(nullptr, &code), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_StatusGetCode(sts, nullptr), StatusCode::NONEXISTENT_PTR); ASSERT_CAPI_STATUS_NULL(OVMS_StatusGetCode(sts, &code)); EXPECT_EQ(code, static_cast(StatusCode::INTERNAL_ERROR)); const char* details = nullptr; - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_StatusGetDetails(nullptr, &details), StatusCode::NONEXISTENT_STATUS); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_StatusGetDetails(sts, nullptr), StatusCode::NONEXISTENT_STRING); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_StatusGetDetails(nullptr, &details), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_StatusGetDetails(sts, nullptr), StatusCode::NONEXISTENT_PTR); ASSERT_CAPI_STATUS_NULL(OVMS_StatusGetDetails(sts, &details)); std::stringstream ss; ss << Status(StatusCode::INTERNAL_ERROR).string() << " - custom message"; @@ -306,11 +266,11 @@ TEST(CApiStatusTest, GetCodeAndDetails) { OVMS_StatusDelete(reinterpret_cast(s.release())); } -class CapiInference : public ::testing::Test {}; +class CAPIInference : public ::testing::Test {}; -TEST_F(CapiInference, TensorSetMovedBuffer) { +TEST_F(CAPIInference, TensorSetMovedBuffer) { constexpr size_t elementsCount = 2; - std::array shape{1, elementsCount}; + std::array shape{1, elementsCount}; InferenceTensor tensor(OVMS_DATATYPE_FP32, shape.data(), shape.size()); std::unique_ptr bufferNull; ASSERT_EQ(tensor.setBuffer(std::move(bufferNull)), ovms::StatusCode::OK); @@ -320,7 +280,7 @@ TEST_F(CapiInference, TensorSetMovedBuffer) { ASSERT_EQ(tensor.setBuffer(std::move(buffer)), ovms::StatusCode::DOUBLE_BUFFER_SET); } -TEST_F(CapiInference, Basic) { +TEST_F(CAPIInference, Basic) { ////////////////////// // start server ////////////////////// @@ -349,7 +309,7 @@ TEST_F(CapiInference, Basic) { ASSERT_NE(nullptr, request); // adding input - ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestAddInput(request, DUMMY_MODEL_INPUT_NAME, OVMS_DATATYPE_FP32, reinterpret_cast(DUMMY_MODEL_SHAPE.data()), DUMMY_MODEL_SHAPE.size())); + ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestAddInput(request, DUMMY_MODEL_INPUT_NAME, OVMS_DATATYPE_FP32, DUMMY_MODEL_SHAPE.data(), DUMMY_MODEL_SHAPE.size())); // setting buffer std::array data{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; uint32_t notUsedNum = 0; @@ -370,14 +330,14 @@ TEST_F(CapiInference, Basic) { ASSERT_CAPI_STATUS_NULL(OVMS_Inference(cserver, request, &response)); // verify GetOutputCount uint32_t outputCount = 42; - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetOutputCount(nullptr, &outputCount), StatusCode::NONEXISTENT_RESPONSE); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetOutputCount(response, nullptr), StatusCode::NONEXISTENT_NUMBER); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetOutputCount(nullptr, &outputCount), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetOutputCount(response, nullptr), StatusCode::NONEXISTENT_PTR); ASSERT_CAPI_STATUS_NULL(OVMS_InferenceResponseGetOutputCount(response, &outputCount)); ASSERT_EQ(outputCount, 1); // verify GetParameterCount uint32_t parameterCount = 42; - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetParameterCount(nullptr, ¶meterCount), StatusCode::NONEXISTENT_RESPONSE); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetParameterCount(response, nullptr), StatusCode::NONEXISTENT_NUMBER); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetParameterCount(nullptr, ¶meterCount), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetParameterCount(response, nullptr), StatusCode::NONEXISTENT_PTR); ASSERT_CAPI_STATUS_NULL(OVMS_InferenceResponseGetParameterCount(response, ¶meterCount)); ASSERT_EQ(0, parameterCount); // verify GetOutput @@ -385,20 +345,20 @@ TEST_F(CapiInference, Basic) { size_t bytesize = 42; uint32_t outputId = 0; OVMS_DataType datatype = (OVMS_DataType)199; - const uint64_t* shape{nullptr}; - uint32_t dimCount = 42; + const int64_t* shape{nullptr}; + size_t dimCount = 42; OVMS_BufferType bufferType = (OVMS_BufferType)199; uint32_t deviceId = 42; const char* outputName{nullptr}; - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetOutput(nullptr, outputId, &outputName, &datatype, &shape, &dimCount, &voutputData, &bytesize, &bufferType, &deviceId), StatusCode::NONEXISTENT_RESPONSE); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetOutput(response, outputId, nullptr, &datatype, &shape, &dimCount, &voutputData, &bytesize, &bufferType, &deviceId), StatusCode::NONEXISTENT_STRING); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetOutput(response, outputId, &outputName, nullptr, &shape, &dimCount, &voutputData, &bytesize, &bufferType, &deviceId), StatusCode::NONEXISTENT_NUMBER); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetOutput(response, outputId, &outputName, &datatype, nullptr, &dimCount, &voutputData, &bytesize, &bufferType, &deviceId), StatusCode::NONEXISTENT_TABLE); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetOutput(response, outputId, &outputName, &datatype, &shape, nullptr, &voutputData, &bytesize, &bufferType, &deviceId), StatusCode::NONEXISTENT_NUMBER); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetOutput(response, outputId, &outputName, &datatype, &shape, &dimCount, nullptr, &bytesize, &bufferType, &deviceId), StatusCode::NONEXISTENT_DATA); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetOutput(response, outputId, &outputName, &datatype, &shape, &dimCount, &voutputData, nullptr, &bufferType, &deviceId), StatusCode::NONEXISTENT_NUMBER); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetOutput(response, outputId, &outputName, &datatype, &shape, &dimCount, &voutputData, &bytesize, nullptr, &deviceId), StatusCode::NONEXISTENT_NUMBER); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetOutput(response, outputId, &outputName, &datatype, &shape, &dimCount, &voutputData, &bytesize, &bufferType, nullptr), StatusCode::NONEXISTENT_NUMBER); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetOutput(nullptr, outputId, &outputName, &datatype, &shape, &dimCount, &voutputData, &bytesize, &bufferType, &deviceId), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetOutput(response, outputId, nullptr, &datatype, &shape, &dimCount, &voutputData, &bytesize, &bufferType, &deviceId), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetOutput(response, outputId, &outputName, nullptr, &shape, &dimCount, &voutputData, &bytesize, &bufferType, &deviceId), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetOutput(response, outputId, &outputName, &datatype, nullptr, &dimCount, &voutputData, &bytesize, &bufferType, &deviceId), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetOutput(response, outputId, &outputName, &datatype, &shape, nullptr, &voutputData, &bytesize, &bufferType, &deviceId), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetOutput(response, outputId, &outputName, &datatype, &shape, &dimCount, nullptr, &bytesize, &bufferType, &deviceId), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetOutput(response, outputId, &outputName, &datatype, &shape, &dimCount, &voutputData, nullptr, &bufferType, &deviceId), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetOutput(response, outputId, &outputName, &datatype, &shape, &dimCount, &voutputData, &bytesize, nullptr, &deviceId), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetOutput(response, outputId, &outputName, &datatype, &shape, &dimCount, &voutputData, &bytesize, &bufferType, nullptr), StatusCode::NONEXISTENT_PTR); ASSERT_CAPI_STATUS_NULL(OVMS_InferenceResponseGetOutput(response, outputId, &outputName, &datatype, &shape, &dimCount, &voutputData, &bytesize, &bufferType, &deviceId)); ASSERT_EQ(std::string(DUMMY_MODEL_OUTPUT_NAME), outputName); EXPECT_EQ(datatype, OVMS_DATATYPE_FP32); @@ -430,9 +390,9 @@ TEST_F(CapiInference, Basic) { // one will be removed together with request but without buffer attached // one will be removed with buffer directly // one will be removed without buffer directly - ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestAddInput(request, "INPUT_WITHOUT_BUFFER_REMOVED_WITH_REQUEST", OVMS_DATATYPE_FP32, reinterpret_cast(DUMMY_MODEL_SHAPE.data()), DUMMY_MODEL_SHAPE.size())); - ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestAddInput(request, "INPUT_WITH_BUFFER_REMOVED_DIRECTLY", OVMS_DATATYPE_FP32, reinterpret_cast(DUMMY_MODEL_SHAPE.data()), DUMMY_MODEL_SHAPE.size())); - ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestAddInput(request, "INPUT_WITHOUT_BUFFER_REMOVED_DIRECTLY", OVMS_DATATYPE_FP32, reinterpret_cast(DUMMY_MODEL_SHAPE.data()), DUMMY_MODEL_SHAPE.size())); + ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestAddInput(request, "INPUT_WITHOUT_BUFFER_REMOVED_WITH_REQUEST", OVMS_DATATYPE_FP32, DUMMY_MODEL_SHAPE.data(), DUMMY_MODEL_SHAPE.size())); + ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestAddInput(request, "INPUT_WITH_BUFFER_REMOVED_DIRECTLY", OVMS_DATATYPE_FP32, DUMMY_MODEL_SHAPE.data(), DUMMY_MODEL_SHAPE.size())); + ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestAddInput(request, "INPUT_WITHOUT_BUFFER_REMOVED_DIRECTLY", OVMS_DATATYPE_FP32, DUMMY_MODEL_SHAPE.data(), DUMMY_MODEL_SHAPE.size())); ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestInputSetData(request, "INPUT_WITH_BUFFER_REMOVED_DIRECTLY", reinterpret_cast(data.data()), sizeof(float) * data.size(), OVMS_BUFFERTYPE_CPU, notUsedNum)); // we will add buffer and remove it to check separate buffer removal ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestInputSetData(request, "INPUT_WITHOUT_BUFFER_REMOVED_DIRECTLY", reinterpret_cast(data.data()), sizeof(float) * data.size(), OVMS_BUFFERTYPE_CPU, notUsedNum)); @@ -445,20 +405,20 @@ TEST_F(CapiInference, Basic) { // we will remove 1 of two parameters ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestRemoveParameter(request, "sequence_id")); // 2nd time should report error - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestRemoveParameter(request, "sequence_id"), StatusCode::NONEXISTENT_PARAMETER_FOR_REMOVAL); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestRemoveParameter(nullptr, "sequence_id"), StatusCode::NONEXISTENT_REQUEST); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestRemoveParameter(request, nullptr), StatusCode::NONEXISTENT_STRING); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestRemoveParameter(request, "sequence_id"), StatusCode::NONEXISTENT_PARAMETER); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestRemoveParameter(nullptr, "sequence_id"), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestRemoveParameter(request, nullptr), StatusCode::NONEXISTENT_PTR); ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestRemoveInput(request, "NONEXISTENT_TENSOR"), StatusCode::NONEXISTENT_TENSOR_FOR_REMOVAL); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestRemoveInput(nullptr, "INPUT_WITHOUT_BUFFER_REMOVED_WITH_REQUEST"), StatusCode::NONEXISTENT_REQUEST); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestRemoveInput(request, nullptr), StatusCode::NONEXISTENT_STRING); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestRemoveInput(nullptr, "INPUT_WITHOUT_BUFFER_REMOVED_WITH_REQUEST"), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestRemoveInput(request, nullptr), StatusCode::NONEXISTENT_PTR); OVMS_InferenceRequestDelete(nullptr); OVMS_InferenceRequestDelete(request); OVMS_ServerDelete(cserver); } -TEST_F(CapiInference, NegativeInference) { +TEST_F(CAPIInference, NegativeInference) { // first start OVMS std::string port = "9000"; randomizePort(port); @@ -474,43 +434,43 @@ TEST_F(CapiInference, NegativeInference) { OVMS_Server* cserver = nullptr; ASSERT_CAPI_STATUS_NULL(OVMS_ServerNew(&cserver)); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerStartFromConfigurationFile(nullptr, serverSettings, modelsSettings), StatusCode::NONEXISTENT_SERVER); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerStartFromConfigurationFile(cserver, nullptr, modelsSettings), StatusCode::NONEXISTENT_SETTINGS); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerStartFromConfigurationFile(cserver, serverSettings, nullptr), StatusCode::NONEXISTENT_SETTINGS); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerStartFromConfigurationFile(nullptr, serverSettings, modelsSettings), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerStartFromConfigurationFile(cserver, nullptr, modelsSettings), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServerStartFromConfigurationFile(cserver, serverSettings, nullptr), StatusCode::NONEXISTENT_PTR); ASSERT_CAPI_STATUS_NULL(OVMS_ServerStartFromConfigurationFile(cserver, serverSettings, modelsSettings)); OVMS_InferenceRequest* request{nullptr}; OVMS_InferenceResponse* response = nullptr; - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestNew(nullptr, cserver, "dummy", 1), StatusCode::NONEXISTENT_REQUEST); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestNew(&request, cserver, nullptr, 1), StatusCode::NONEXISTENT_STRING); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestNew(&request, nullptr, "dummy", 1), StatusCode::NONEXISTENT_SERVER); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestNew(nullptr, cserver, "dummy", 1), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestNew(&request, cserver, nullptr, 1), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestNew(&request, nullptr, "dummy", 1), StatusCode::NONEXISTENT_PTR); ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestNew(&request, cserver, "dummy", 1)); ASSERT_NE(nullptr, request); // negative no inputs ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_Inference(cserver, request, &response), StatusCode::INVALID_NO_OF_INPUTS); // negative no input buffer - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestAddInput(nullptr, DUMMY_MODEL_INPUT_NAME, OVMS_DATATYPE_FP32, reinterpret_cast(DUMMY_MODEL_SHAPE.data()), DUMMY_MODEL_SHAPE.size()), StatusCode::NONEXISTENT_REQUEST); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestAddInput(request, nullptr, OVMS_DATATYPE_FP32, reinterpret_cast(DUMMY_MODEL_SHAPE.data()), DUMMY_MODEL_SHAPE.size()), StatusCode::NONEXISTENT_STRING); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestAddInput(request, DUMMY_MODEL_INPUT_NAME, OVMS_DATATYPE_FP32, nullptr, DUMMY_MODEL_SHAPE.size()), StatusCode::NONEXISTENT_TABLE); - ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestAddInput(request, DUMMY_MODEL_INPUT_NAME, OVMS_DATATYPE_FP32, reinterpret_cast(DUMMY_MODEL_SHAPE.data()), DUMMY_MODEL_SHAPE.size())); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestAddInput(nullptr, DUMMY_MODEL_INPUT_NAME, OVMS_DATATYPE_FP32, DUMMY_MODEL_SHAPE.data(), DUMMY_MODEL_SHAPE.size()), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestAddInput(request, nullptr, OVMS_DATATYPE_FP32, DUMMY_MODEL_SHAPE.data(), DUMMY_MODEL_SHAPE.size()), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestAddInput(request, DUMMY_MODEL_INPUT_NAME, OVMS_DATATYPE_FP32, nullptr, DUMMY_MODEL_SHAPE.size()), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestAddInput(request, DUMMY_MODEL_INPUT_NAME, OVMS_DATATYPE_FP32, DUMMY_MODEL_SHAPE.data(), DUMMY_MODEL_SHAPE.size())); // fail with adding input second time - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestAddInput(request, DUMMY_MODEL_INPUT_NAME, OVMS_DATATYPE_FP32, reinterpret_cast(DUMMY_MODEL_SHAPE.data()), DUMMY_MODEL_SHAPE.size()), StatusCode::DOUBLE_TENSOR_INSERT); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestAddInput(request, DUMMY_MODEL_INPUT_NAME, OVMS_DATATYPE_FP32, DUMMY_MODEL_SHAPE.data(), DUMMY_MODEL_SHAPE.size()), StatusCode::DOUBLE_TENSOR_INSERT); ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_Inference(cserver, request, &response), StatusCode::INVALID_CONTENT_SIZE); // setting buffer std::array data{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; uint32_t notUsedNum = 0; - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestInputSetData(nullptr, DUMMY_MODEL_INPUT_NAME, reinterpret_cast(data.data()), sizeof(float) * data.size(), OVMS_BUFFERTYPE_CPU, notUsedNum), StatusCode::NONEXISTENT_REQUEST); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestInputSetData(request, nullptr, reinterpret_cast(data.data()), sizeof(float) * data.size(), OVMS_BUFFERTYPE_CPU, notUsedNum), StatusCode::NONEXISTENT_STRING); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestInputSetData(request, DUMMY_MODEL_INPUT_NAME, nullptr, sizeof(float) * data.size(), OVMS_BUFFERTYPE_CPU, notUsedNum), StatusCode::NONEXISTENT_DATA); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestInputSetData(nullptr, DUMMY_MODEL_INPUT_NAME, reinterpret_cast(data.data()), sizeof(float) * data.size(), OVMS_BUFFERTYPE_CPU, notUsedNum), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestInputSetData(request, nullptr, reinterpret_cast(data.data()), sizeof(float) * data.size(), OVMS_BUFFERTYPE_CPU, notUsedNum), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestInputSetData(request, DUMMY_MODEL_INPUT_NAME, nullptr, sizeof(float) * data.size(), OVMS_BUFFERTYPE_CPU, notUsedNum), StatusCode::NONEXISTENT_PTR); ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestInputSetData(request, DUMMY_MODEL_INPUT_NAME, reinterpret_cast(data.data()), sizeof(float) * data.size(), OVMS_BUFFERTYPE_CPU, notUsedNum)); ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestInputSetData(request, "NONEXISTENT_TENSOR", reinterpret_cast(data.data()), sizeof(float) * data.size(), OVMS_BUFFERTYPE_CPU, notUsedNum), StatusCode::NONEXISTENT_TENSOR_FOR_SET_BUFFER); // add parameters const uint64_t sequenceId{42}; - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestAddParameter(nullptr, "sequence_id", OVMS_DATATYPE_U64, reinterpret_cast(&sequenceId), sizeof(sequenceId)), StatusCode::NONEXISTENT_REQUEST); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestAddParameter(request, nullptr, OVMS_DATATYPE_U64, reinterpret_cast(&sequenceId), sizeof(sequenceId)), StatusCode::NONEXISTENT_STRING); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestAddParameter(request, "sequence_id", OVMS_DATATYPE_U64, nullptr, sizeof(sequenceId)), StatusCode::NONEXISTENT_DATA); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestAddParameter(nullptr, "sequence_id", OVMS_DATATYPE_U64, reinterpret_cast(&sequenceId), sizeof(sequenceId)), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestAddParameter(request, nullptr, OVMS_DATATYPE_U64, reinterpret_cast(&sequenceId), sizeof(sequenceId)), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestAddParameter(request, "sequence_id", OVMS_DATATYPE_U64, nullptr, sizeof(sequenceId)), StatusCode::NONEXISTENT_PTR); ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestAddParameter(request, "sequence_id", OVMS_DATATYPE_U64, reinterpret_cast(&sequenceId), sizeof(sequenceId))); // 2nd time should get error ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestAddParameter(request, "sequence_id", OVMS_DATATYPE_U64, reinterpret_cast(&sequenceId), sizeof(sequenceId)), StatusCode::DOUBLE_PARAMETER_INSERT); @@ -518,9 +478,9 @@ TEST_F(CapiInference, NegativeInference) { ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestAddParameter(request, "sequence_control_input", OVMS_DATATYPE_U32, reinterpret_cast(&sequenceControl), sizeof(sequenceControl))); // verify passing nullptrs - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_Inference(nullptr, request, &response), StatusCode::NONEXISTENT_SERVER); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_Inference(cserver, nullptr, &response), StatusCode::NONEXISTENT_REQUEST); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_Inference(cserver, request, nullptr), StatusCode::NONEXISTENT_RESPONSE); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_Inference(nullptr, request, &response), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_Inference(cserver, nullptr, &response), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_Inference(cserver, request, nullptr), StatusCode::NONEXISTENT_PTR); // negative inference with non existing model OVMS_InferenceRequest* requestNoModel{nullptr}; @@ -529,8 +489,8 @@ TEST_F(CapiInference, NegativeInference) { // negative no model ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_Inference(cserver, request, &response), StatusCode::PIPELINE_DEFINITION_NAME_MISSING); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestAddInput(nullptr, DUMMY_MODEL_INPUT_NAME, OVMS_DATATYPE_FP32, reinterpret_cast(DUMMY_MODEL_SHAPE.data()), DUMMY_MODEL_SHAPE.size()), StatusCode::NONEXISTENT_REQUEST); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_Inference(cserver, requestNoModel, &reponseNoModel), StatusCode::NONEXISTENT_REQUEST); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceRequestAddInput(nullptr, DUMMY_MODEL_INPUT_NAME, OVMS_DATATYPE_FP32, DUMMY_MODEL_SHAPE.data(), DUMMY_MODEL_SHAPE.size()), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_Inference(cserver, requestNoModel, &reponseNoModel), StatusCode::NONEXISTENT_PTR); OVMS_InferenceRequestDelete(requestNoModel); OVMS_ServerDelete(nullptr); @@ -540,7 +500,7 @@ TEST_F(CapiInference, NegativeInference) { namespace { const std::string MODEL_NAME{"SomeModelName"}; -const uint64_t MODEL_VERSION{42}; +const int64_t MODEL_VERSION{42}; const std::string PARAMETER_NAME{"sequence_id"}; const OVMS_DataType PARAMETER_DATATYPE{OVMS_DATATYPE_I32}; @@ -555,10 +515,10 @@ constexpr size_t INPUT_DATA_BYTESIZE{INPUT_DATA.size() * sizeof(float)}; const OVMS_DataType DATATYPE{OVMS_DATATYPE_FP32}; } // namespace -TEST_F(CapiInference, ResponseRetrieval) { +TEST_F(CAPIInference, ResponseRetrieval) { auto cppResponse = std::make_unique(MODEL_NAME, MODEL_VERSION); // add output - std::array cppOutputShape{1, DUMMY_MODEL_INPUT_SIZE}; + std::array cppOutputShape{1, DUMMY_MODEL_INPUT_SIZE}; auto cppStatus = cppResponse->addOutput(INPUT_NAME.c_str(), DATATYPE, cppOutputShape.data(), cppOutputShape.size()); ASSERT_EQ(cppStatus, StatusCode::OK) << cppStatus.string(); InferenceTensor* cpptensor = nullptr; @@ -588,9 +548,9 @@ TEST_F(CapiInference, ResponseRetrieval) { // verify get Parameter OVMS_DataType parameterDatatype = OVMS_DATATYPE_FP32; const void* parameterData{nullptr}; - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetParameter(nullptr, 0, ¶meterDatatype, ¶meterData), StatusCode::NONEXISTENT_RESPONSE); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetParameter(response, 0, nullptr, ¶meterData), StatusCode::NONEXISTENT_NUMBER); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetParameter(response, 0, ¶meterDatatype, nullptr), StatusCode::NONEXISTENT_DATA); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetParameter(nullptr, 0, ¶meterDatatype, ¶meterData), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetParameter(response, 0, nullptr, ¶meterData), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetParameter(response, 0, ¶meterDatatype, nullptr), StatusCode::NONEXISTENT_PTR); ASSERT_CAPI_STATUS_NULL(OVMS_InferenceResponseGetParameter(response, 0, ¶meterDatatype, ¶meterData)); ASSERT_EQ(parameterDatatype, OVMS_DATATYPE_U64); EXPECT_EQ(0, std::memcmp(parameterData, (void*)&seqId, sizeof(seqId))); @@ -599,8 +559,8 @@ TEST_F(CapiInference, ResponseRetrieval) { size_t bytesize = 42; uint32_t outputId = 0; OVMS_DataType datatype = (OVMS_DataType)199; - const uint64_t* shape{nullptr}; - uint32_t dimCount = 42; + const int64_t* shape{nullptr}; + size_t dimCount = 42; OVMS_BufferType bufferType = (OVMS_BufferType)199; uint32_t deviceId = 42; const char* outputName{nullptr}; @@ -626,7 +586,7 @@ TEST_F(CapiInference, ResponseRetrieval) { ASSERT_EQ(cppStatus, StatusCode::OK) << cppStatus.string(); ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetOutput(response, outputId + 1, &outputName, &datatype, &shape, &dimCount, &voutputData, &bytesize, &bufferType, &deviceId), StatusCode::INTERNAL_ERROR); // negative scenario nonexistsing parameter - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetParameter(response, 123, ¶meterDatatype, ¶meterData), StatusCode::NONEXISTENT_PARAMETER_FOR_REMOVAL); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_InferenceResponseGetParameter(response, 123, ¶meterDatatype, ¶meterData), StatusCode::NONEXISTENT_PARAMETER); // final cleanup // we release unique_ptr ownership here so that we can free it safely via C-API cppResponse.release(); @@ -634,7 +594,193 @@ TEST_F(CapiInference, ResponseRetrieval) { OVMS_InferenceResponseDelete(response); } -TEST_F(CapiInference, CallInferenceServerNotStarted) { +class CAPIMetadata : public ::testing::Test { +protected: + static OVMS_Server* cserver; + +public: + static void SetUpTestSuite() { + std::string port = "9000"; + randomizePort(port); + // prepare options + OVMS_ServerSettings* serverSettings = nullptr; + OVMS_ModelsSettings* modelsSettings = nullptr; + ASSERT_CAPI_STATUS_NULL(OVMS_ServerSettingsNew(&serverSettings)); + ASSERT_CAPI_STATUS_NULL(OVMS_ModelsSettingsNew(&modelsSettings)); + ASSERT_NE(serverSettings, nullptr); + ASSERT_NE(modelsSettings, nullptr); + ASSERT_CAPI_STATUS_NULL(OVMS_ServerSettingsSetGrpcPort(serverSettings, std::stoi(port))); + ASSERT_CAPI_STATUS_NULL(OVMS_ModelsSettingsSetConfigPath(modelsSettings, "/ovms/src/test/c_api/config_metadata_all.json")); + cserver = nullptr; + ASSERT_CAPI_STATUS_NULL(OVMS_ServerNew(&cserver)); + ASSERT_CAPI_STATUS_NULL(OVMS_ServerStartFromConfigurationFile(cserver, serverSettings, modelsSettings)); + OVMS_ModelsSettingsDelete(modelsSettings); + OVMS_ServerSettingsDelete(serverSettings); + } + static void TearDownTestSuite() { + OVMS_ServerDelete(cserver); + cserver = nullptr; + } + void checkMetadata(const std::string& servableName, + int64_t servableVersion, + const tensor_map_t& expectedInputsInfo, + const tensor_map_t& expectedOutputsInfo) { + OVMS_ServableMetadata* servableMetadata = nullptr; + ASSERT_CAPI_STATUS_NULL(OVMS_GetServableMetadata(cserver, servableName.c_str(), servableVersion, &servableMetadata)); + ASSERT_NE(nullptr, servableMetadata); + uint32_t inputCount = 42; + uint32_t outputCount = 42; + ASSERT_CAPI_STATUS_NULL(OVMS_ServableMetadataGetInputCount(servableMetadata, &inputCount)); + ASSERT_CAPI_STATUS_NULL(OVMS_ServableMetadataGetOutputCount(servableMetadata, &outputCount)); + ASSERT_EQ(expectedInputsInfo.size(), inputCount); + ASSERT_EQ(expectedOutputsInfo.size(), outputCount); + + uint32_t id = 0; + OVMS_DataType datatype = (OVMS_DataType)199; + int64_t* shapeMin{nullptr}; + int64_t* shapeMax{nullptr}; + size_t dimCount = 42; + const char* tensorName{nullptr}; + std::set inputNames; + std::set outputNames; + for (id = 0; id < inputCount; ++id) { + ASSERT_CAPI_STATUS_NULL(OVMS_ServableMetadataGetInput(servableMetadata, id, &tensorName, &datatype, &dimCount, &shapeMin, &shapeMax)); + auto it = expectedInputsInfo.find(tensorName); + ASSERT_NE(it, expectedInputsInfo.end()); + inputNames.insert(tensorName); + EXPECT_EQ(datatype, ovms::getPrecisionAsOVMSDataType(it->second->getPrecision())); + auto& expectedShape = it->second->getShape(); + ASSERT_EQ(expectedShape.size(), dimCount); + for (size_t i = 0; i < expectedShape.size(); ++i) { + EXPECT_EQ(expectedShape[i], ovms::Dimension(shapeMin[i], shapeMax[i])); + } + } + EXPECT_EQ(inputNames.size(), inputCount); + for (id = 0; id < outputCount; ++id) { + ASSERT_CAPI_STATUS_NULL(OVMS_ServableMetadataGetOutput(servableMetadata, id, &tensorName, &datatype, &dimCount, &shapeMin, &shapeMax)); + auto it = expectedOutputsInfo.find(tensorName); + ASSERT_NE(it, expectedOutputsInfo.end()); + outputNames.insert(tensorName); + EXPECT_EQ(datatype, ovms::getPrecisionAsOVMSDataType(it->second->getPrecision())); + auto& expectedShape = it->second->getShape(); + ASSERT_EQ(expectedShape.size(), dimCount); + for (size_t i = 0; i < expectedShape.size(); ++i) { + EXPECT_EQ(expectedShape[i], ovms::Dimension(shapeMin[i], shapeMax[i])); + } + } + EXPECT_EQ(outputNames.size(), outputCount); + const ov::AnyMap* servableMetadataRtInfo{nullptr}; + ASSERT_CAPI_STATUS_NULL(OVMS_ServableMetadataGetInfo(servableMetadata, reinterpret_cast(&servableMetadataRtInfo))); + ASSERT_NE(nullptr, servableMetadataRtInfo); + EXPECT_EQ(0, servableMetadataRtInfo->size()); + OVMS_ServableMetadataDelete(servableMetadata); + } + + void checkServableAsDummy(const std::string& servableName) { + model_version_t servableVersion = 1; + ovms::tensor_map_t inputsInfo({{DUMMY_MODEL_INPUT_NAME, + std::make_shared(DUMMY_MODEL_INPUT_NAME, ovms::Precision::FP32, ovms::Shape{1, 10})}}); + ovms::tensor_map_t outputsInfo({{DUMMY_MODEL_OUTPUT_NAME, + std::make_shared(DUMMY_MODEL_OUTPUT_NAME, ovms::Precision::FP32, ovms::Shape{1, 10})}}); + checkMetadata(servableName, servableVersion, inputsInfo, outputsInfo); + } +}; +OVMS_Server* CAPIMetadata::cserver = nullptr; + +TEST_F(CAPIMetadata, Negative) { + OVMS_ServableMetadata* servableMetadata = nullptr; + const std::string servableName = "dummy"; + model_version_t servableVersion = 1; + // nullptr tests + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_GetServableMetadata(nullptr, servableName.c_str(), servableVersion, &servableMetadata), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_GetServableMetadata(cserver, nullptr, servableVersion, &servableMetadata), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_GetServableMetadata(cserver, servableName.c_str(), servableVersion, nullptr), StatusCode::NONEXISTENT_PTR); + // negative missing servable + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_GetServableMetadata(cserver, "NONEXISTENT_NAME", servableVersion, &servableMetadata), StatusCode::MODEL_NAME_MISSING); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_GetServableMetadata(cserver, servableName.c_str(), -1, &servableMetadata), StatusCode::MODEL_VERSION_MISSING); + // proper call + ASSERT_CAPI_STATUS_NULL(OVMS_GetServableMetadata(cserver, servableName.c_str(), servableVersion, &servableMetadata)); + ASSERT_NE(nullptr, servableMetadata); + uint32_t inputCount = 42; + uint32_t outputCount = 42; + // OVMS_ServableMetadataGetInputCount + // negative + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServableMetadataGetInputCount(nullptr, &inputCount), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServableMetadataGetInputCount(servableMetadata, nullptr), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServableMetadataGetOutputCount(nullptr, &outputCount), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServableMetadataGetOutputCount(servableMetadata, nullptr), StatusCode::NONEXISTENT_PTR); + + // check inputs + uint32_t id = 0; + OVMS_DataType datatype = (OVMS_DataType)199; + int64_t* shapeMin{nullptr}; + int64_t* shapeMax{nullptr}; + size_t dimCount = 42; + const char* tensorName{nullptr}; + // OVMS_ServableMetadataGetInput + // negative + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServableMetadataGetInput(nullptr, id, &tensorName, &datatype, &dimCount, &shapeMin, &shapeMax), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServableMetadataGetInput(servableMetadata, 412, &tensorName, &datatype, &dimCount, &shapeMin, &shapeMax), StatusCode::NONEXISTENT_TENSOR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServableMetadataGetInput(servableMetadata, id, nullptr, &datatype, &dimCount, &shapeMin, &shapeMax), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServableMetadataGetInput(servableMetadata, id, &tensorName, nullptr, &dimCount, &shapeMin, &shapeMax), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServableMetadataGetInput(servableMetadata, id, &tensorName, &datatype, nullptr, &shapeMin, &shapeMax), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServableMetadataGetInput(servableMetadata, id, &tensorName, &datatype, &dimCount, nullptr, &shapeMax), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServableMetadataGetInput(servableMetadata, id, &tensorName, &datatype, &dimCount, &shapeMin, nullptr), StatusCode::NONEXISTENT_PTR); + // check outputs + id = 0; + datatype = (OVMS_DataType)199; + shapeMin = nullptr; + shapeMax = nullptr; + dimCount = 42; + tensorName = nullptr; + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServableMetadataGetOutput(nullptr, id, &tensorName, &datatype, &dimCount, &shapeMin, &shapeMax), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServableMetadataGetOutput(servableMetadata, 412, &tensorName, &datatype, &dimCount, &shapeMin, &shapeMax), StatusCode::NONEXISTENT_TENSOR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServableMetadataGetOutput(servableMetadata, id, nullptr, &datatype, &dimCount, &shapeMin, &shapeMax), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServableMetadataGetOutput(servableMetadata, id, &tensorName, nullptr, &dimCount, &shapeMin, &shapeMax), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServableMetadataGetOutput(servableMetadata, id, &tensorName, &datatype, nullptr, &shapeMin, &shapeMax), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServableMetadataGetOutput(servableMetadata, id, &tensorName, &datatype, &dimCount, nullptr, &shapeMax), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServableMetadataGetOutput(servableMetadata, id, &tensorName, &datatype, &dimCount, &shapeMin, nullptr), StatusCode::NONEXISTENT_PTR); + // check info + const ov::AnyMap* servableMetadataRtInfo; + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServableMetadataGetInfo(nullptr, reinterpret_cast(&servableMetadataRtInfo)), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ServableMetadataGetInfo(servableMetadata, nullptr), StatusCode::NONEXISTENT_PTR); + + OVMS_ServableMetadataDelete(nullptr); +} + +TEST_F(CAPIMetadata, BasicDummy) { + const std::string servableName{"dummy"}; + checkServableAsDummy(servableName); +} + +TEST_F(CAPIMetadata, BasicDummyDag) { + const std::string servableName{"pipeline1Dummy"}; + checkServableAsDummy(servableName); +} + +TEST_F(CAPIMetadata, DummyDynamicShapes) { + const std::string servableName = "dummyDynamic"; + model_version_t servableVersion = 1; + ovms::tensor_map_t inputsInfo({{DUMMY_MODEL_INPUT_NAME, + std::make_shared(DUMMY_MODEL_INPUT_NAME, ovms::Precision::FP32, ovms::Shape{ovms::Dimension::any(), {1, 10}})}}); + ovms::tensor_map_t outputsInfo({{DUMMY_MODEL_OUTPUT_NAME, + std::make_shared(DUMMY_MODEL_OUTPUT_NAME, ovms::Precision::FP32, ovms::Shape{ovms::Dimension::any(), {1, 10}})}}); + checkMetadata(servableName, servableVersion, inputsInfo, outputsInfo); +} + +TEST_F(CAPIMetadata, TwoInputsAddModel) { + const std::string servableName = "add"; + model_version_t servableVersion = 1; + ovms::tensor_map_t inputsInfo({{SUM_MODEL_INPUT_NAME_1, + std::make_shared(SUM_MODEL_INPUT_NAME_1, ovms::Precision::FP32, ovms::Shape{1, 3})}, + {SUM_MODEL_INPUT_NAME_2, + std::make_shared(SUM_MODEL_INPUT_NAME_2, ovms::Precision::FP32, ovms::Shape{1, 3})}}); + ovms::tensor_map_t outputsInfo({{SUM_MODEL_OUTPUT_NAME, + std::make_shared(SUM_MODEL_OUTPUT_NAME, ovms::Precision::FP32, ovms::Shape{1, 3})}}); + checkMetadata(servableName, servableVersion, inputsInfo, outputsInfo); +} + +TEST_F(CAPIInference, CallInferenceServerNotStarted) { OVMS_Server* cserver = nullptr; OVMS_InferenceRequest* request{nullptr}; OVMS_InferenceResponse* response = nullptr; @@ -642,17 +788,17 @@ TEST_F(CapiInference, CallInferenceServerNotStarted) { ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestNew(&request, cserver, "dummy", 1)); ASSERT_NE(nullptr, cserver); ASSERT_NE(nullptr, request); - ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestAddInput(request, DUMMY_MODEL_INPUT_NAME, OVMS_DATATYPE_FP32, reinterpret_cast(DUMMY_MODEL_SHAPE.data()), DUMMY_MODEL_SHAPE.size())); + ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestAddInput(request, DUMMY_MODEL_INPUT_NAME, OVMS_DATATYPE_FP32, DUMMY_MODEL_SHAPE.data(), DUMMY_MODEL_SHAPE.size())); std::array data{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; uint32_t notUsedNum = 0; ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestInputSetData(request, DUMMY_MODEL_INPUT_NAME, reinterpret_cast(data.data()), sizeof(float) * data.size(), OVMS_BUFFERTYPE_CPU, notUsedNum)); - ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_Inference(cserver, request, &response), StatusCode::SERVER_NOT_READY_FOR_INFERENCE); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_Inference(cserver, request, &response), StatusCode::SERVER_NOT_READY); OVMS_InferenceResponseDelete(response); OVMS_InferenceRequestDelete(request); OVMS_ServerDelete(cserver); } -class CapiDagInference : public ::testing::Test { +class CAPIDagInference : public ::testing::Test { protected: OVMS_ServerSettings* serverSettings = nullptr; OVMS_ModelsSettings* modelsSettings = nullptr; @@ -667,8 +813,8 @@ class CapiDagInference : public ::testing::Test { size_t bytesize = 42; uint32_t outputId = 0; OVMS_DataType datatype = (OVMS_DataType)199; - const uint64_t* shape{nullptr}; - uint32_t dimCount = 42; + const int64_t* shape{nullptr}; + size_t dimCount = 42; OVMS_BufferType bufferType = (OVMS_BufferType)199; uint32_t deviceId = 42; const char* outputName{nullptr}; @@ -706,7 +852,7 @@ class CapiDagInference : public ::testing::Test { } }; -TEST_F(CapiDagInference, BasicDummyDag) { +TEST_F(CAPIDagInference, BasicDummyDag) { ////////////////////// // start server ////////////////////// @@ -720,7 +866,7 @@ TEST_F(CapiDagInference, BasicDummyDag) { ASSERT_NE(nullptr, request); // adding input - ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestAddInput(request, DUMMY_MODEL_INPUT_NAME, OVMS_DATATYPE_FP32, reinterpret_cast(DUMMY_MODEL_SHAPE.data()), DUMMY_MODEL_SHAPE.size())); + ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestAddInput(request, DUMMY_MODEL_INPUT_NAME, OVMS_DATATYPE_FP32, DUMMY_MODEL_SHAPE.data(), DUMMY_MODEL_SHAPE.size())); // setting buffer std::array data{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestInputSetData(request, DUMMY_MODEL_INPUT_NAME, reinterpret_cast(data.data()), sizeof(float) * data.size(), OVMS_BUFFERTYPE_CPU, notUsedNum)); @@ -755,7 +901,7 @@ TEST_F(CapiDagInference, BasicDummyDag) { OVMS_InferenceRequestDelete(request); } -TEST_F(CapiDagInference, DynamicEntryDummyDag) { +TEST_F(CAPIDagInference, DynamicEntryDummyDag) { ////////////////////// // start server ////////////////////// @@ -771,7 +917,7 @@ TEST_F(CapiDagInference, DynamicEntryDummyDag) { // adding input const size_t demultiplyCount = 3; - std::array inputShape{demultiplyCount, 1, 10}; + std::array inputShape{demultiplyCount, 1, 10}; ASSERT_CAPI_STATUS_NULL(OVMS_InferenceRequestAddInput(request, DUMMY_MODEL_INPUT_NAME, OVMS_DATATYPE_FP32, inputShape.data(), inputShape.size())); // setting buffer std::array data; @@ -811,3 +957,13 @@ TEST_F(CapiDagInference, DynamicEntryDummyDag) { OVMS_InferenceResponseDelete(response); OVMS_InferenceRequestDelete(request); } + +TEST(CAPI, ApiVersion) { + uint32_t major = 9999, minor = 9999; + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ApiVersion(nullptr, &minor), StatusCode::NONEXISTENT_PTR); + ASSERT_CAPI_STATUS_NOT_NULL_EXPECT_CODE(OVMS_ApiVersion(&major, nullptr), StatusCode::NONEXISTENT_PTR); + + ASSERT_CAPI_STATUS_NULL(OVMS_ApiVersion(&major, &minor)); + ASSERT_EQ(major, OVMS_API_VERSION_MAJOR); + ASSERT_EQ(minor, OVMS_API_VERSION_MINOR); +} diff --git a/src/test/capi_predict_validation_test.cpp b/src/test/capi_predict_validation_test.cpp index 0e69b40c9d..dbaf0d4a19 100644 --- a/src/test/capi_predict_validation_test.cpp +++ b/src/test/capi_predict_validation_test.cpp @@ -67,13 +67,13 @@ class CAPIPredictValidation : public ::testing::Test { preparePredictRequest(request, {{"Input_FP32_1_224_224_3_NHWC", - std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, + std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, {"Input_U8_1_3_62_62_NCHW", - std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, + std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, {"Input_I64_1_6_128_128_16_NCDHW", - std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, + std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, {"Input_U16_1_2_8_4_NCHW", - std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, + std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, requestData); } }; @@ -86,13 +86,13 @@ TEST_F(CAPIPredictValidation, ValidRequest) { TEST_F(CAPIPredictValidation, InvalidPrecision) { preparePredictRequest(request, {{"Input_FP32_1_224_224_3_NHWC", - std::tuple{{1, 224, 224, 3}, static_cast(99)}}, + std::tuple{{1, 224, 224, 3}, static_cast(99)}}, {"Input_U8_1_3_62_62_NCHW", - std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, + std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, {"Input_I64_1_6_128_128_16_NCDHW", - std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, + std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, {"Input_U16_1_2_8_4_NCHW", - std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, + std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, requestData); auto status = instance->mockValidate(&request); EXPECT_EQ(status, ovms::StatusCode::INVALID_PRECISION) << status.string(); @@ -107,7 +107,7 @@ TEST_F(CAPIPredictValidation, RequestNotEnoughInputs) { TEST_F(CAPIPredictValidation, RequestTooManyInputs) { preparePredictRequest(request, {{"Input_FP32_1_224_224_3_NHWC", - std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}}, + std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}}, requestData); auto status = instance->mockValidate(&request); EXPECT_EQ(status, ovms::StatusCode::INVALID_NO_OF_INPUTS) << status.string(); @@ -116,13 +116,13 @@ TEST_F(CAPIPredictValidation, RequestTooManyInputs) { TEST_F(CAPIPredictValidation, RequestMissingInputName) { preparePredictRequest(request, {{"BadInput_FP32_1_224_224_3_NHWC", - std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, + std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, {"Input_U8_1_3_62_62_NCHW", - std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, + std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, {"Input_I64_1_6_128_128_16_NCDHW", - std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, + std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, {"Input_U16_1_2_8_4_NCHW", - std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, + std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, requestData); auto status = instance->mockValidate(&request); @@ -133,7 +133,7 @@ TEST_F(CAPIPredictValidation, RequestWrongInputName) { request.removeInput("Input_U16_1_2_8_4_NCHW"); preparePredictRequest(request, {{"BADInput_FP32_1_224_224_3_NHWC", - std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}}, + std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}}, requestData); auto status = instance->mockValidate(&request); EXPECT_EQ(status, ovms::StatusCode::INVALID_NO_OF_INPUTS) << status.string(); @@ -142,13 +142,13 @@ TEST_F(CAPIPredictValidation, RequestWrongInputName) { TEST_F(CAPIPredictValidation, RequestTooManyShapeDimensions) { preparePredictRequest(request, {{"Input_FP32_1_224_224_3_NHWC", - std::tuple{{1, 224, 224}, ovms::Precision::FP32}}, + std::tuple{{1, 224, 224}, ovms::Precision::FP32}}, {"Input_U8_1_3_62_62_NCHW", - std::tuple{{1, 3, 62}, ovms::Precision::U8}}, + std::tuple{{1, 3, 62}, ovms::Precision::U8}}, {"Input_I64_1_6_128_128_16_NCDHW", - std::tuple{{1, 6, 16}, ovms::Precision::I64}}, + std::tuple{{1, 6, 16}, ovms::Precision::I64}}, {"Input_U16_1_2_8_4_NCHW", - std::tuple{{1, 8, 4}, ovms::Precision::U16}}}, + std::tuple{{1, 8, 4}, ovms::Precision::U16}}}, requestData); auto status = instance->mockValidate(&request); @@ -158,13 +158,13 @@ TEST_F(CAPIPredictValidation, RequestTooManyShapeDimensions) { TEST_F(CAPIPredictValidation, RequestNotEnoughShapeDimensions) { preparePredictRequest(request, {{"Input_FP32_1_224_224_3_NHWC", - std::tuple{{1, 224, 224, 3, 3}, ovms::Precision::FP32}}, + std::tuple{{1, 224, 224, 3, 3}, ovms::Precision::FP32}}, {"Input_U8_1_3_62_62_NCHW", - std::tuple{{1, 3, 62, 62, 5}, ovms::Precision::U8}}, + std::tuple{{1, 3, 62, 62, 5}, ovms::Precision::U8}}, {"Input_I64_1_6_128_128_16_NCDHW", - std::tuple{{1, 6, 128, 128, 16, 6}, ovms::Precision::I64}}, + std::tuple{{1, 6, 128, 128, 16, 6}, ovms::Precision::I64}}, {"Input_U16_1_2_8_4_NCHW", - std::tuple{{1, 2, 8, 4, 5}, ovms::Precision::U16}}}, + std::tuple{{1, 2, 8, 4, 5}, ovms::Precision::U16}}}, requestData); auto status = instance->mockValidate(&request); @@ -174,13 +174,13 @@ TEST_F(CAPIPredictValidation, RequestNotEnoughShapeDimensions) { TEST_F(CAPIPredictValidation, RequestWrongBatchSize) { preparePredictRequest(request, {{"Input_FP32_1_224_224_3_NHWC", - std::tuple{{2, 224, 224, 3}, ovms::Precision::FP32}}, + std::tuple{{2, 224, 224, 3}, ovms::Precision::FP32}}, {"Input_U8_1_3_62_62_NCHW", - std::tuple{{2, 3, 62, 62}, ovms::Precision::U8}}, + std::tuple{{2, 3, 62, 62}, ovms::Precision::U8}}, {"Input_I64_1_6_128_128_16_NCDHW", - std::tuple{{2, 6, 128, 128, 16}, ovms::Precision::I64}}, + std::tuple{{2, 6, 128, 128, 16}, ovms::Precision::I64}}, {"Input_U16_1_2_8_4_NCHW", - std::tuple{{2, 2, 8, 4}, ovms::Precision::U16}}}, + std::tuple{{2, 2, 8, 4}, ovms::Precision::U16}}}, requestData); // dim(0) is batch size auto status = instance->mockValidate(&request); @@ -191,13 +191,13 @@ TEST_F(CAPIPredictValidation, RequestWrongBatchSizeAuto) { modelConfig.setBatchingParams("auto"); preparePredictRequest(request, {{"Input_FP32_1_224_224_3_NHWC", - std::tuple{{2, 224, 224, 3}, ovms::Precision::FP32}}, + std::tuple{{2, 224, 224, 3}, ovms::Precision::FP32}}, {"Input_U8_1_3_62_62_NCHW", - std::tuple{{2, 3, 62, 62}, ovms::Precision::U8}}, + std::tuple{{2, 3, 62, 62}, ovms::Precision::U8}}, {"Input_I64_1_6_128_128_16_NCDHW", - std::tuple{{2, 6, 128, 128, 16}, ovms::Precision::I64}}, + std::tuple{{2, 6, 128, 128, 16}, ovms::Precision::I64}}, {"Input_U16_1_2_8_4_NCHW", - std::tuple{{2, 2, 8, 4}, ovms::Precision::U16}}}, + std::tuple{{2, 2, 8, 4}, ovms::Precision::U16}}}, requestData); auto status = instance->mockValidate(&request); EXPECT_EQ(status, ovms::StatusCode::BATCHSIZE_CHANGE_REQUIRED) << status.string(); @@ -254,13 +254,47 @@ TEST_F(CAPIPredictValidation, RequestWrongShapeValues) { modelConfig.setBatchingParams("auto"); preparePredictRequest(request, {{"Input_FP32_1_224_224_3_NHWC", - std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, + std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, {"Input_U8_1_3_62_62_NCHW", - std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, + std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, {"Input_I64_1_6_128_128_16_NCDHW", - std::tuple{{1, 6, 128, 128, 17}, ovms::Precision::I64}}, + std::tuple{{1, 6, 128, 128, 17}, ovms::Precision::I64}}, {"Input_U16_1_2_8_4_NCHW", - std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, + std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, + requestData); + + auto status = instance->mockValidate(&request); + EXPECT_EQ(status, ovms::StatusCode::INVALID_SHAPE) << status.string(); +} + +TEST_F(CAPIPredictValidation, RequestNegativeBatchValue) { + modelConfig.setBatchingParams("auto"); + preparePredictRequest(request, + {{"Input_FP32_1_224_224_3_NHWC", + std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, + {"Input_U8_1_3_62_62_NCHW", + std::tuple{{-1, 3, 62, 62}, ovms::Precision::U8}}, + {"Input_I64_1_6_128_128_16_NCDHW", + std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, + {"Input_U16_1_2_8_4_NCHW", + std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, + requestData); + + auto status = instance->mockValidate(&request); + EXPECT_EQ(status, ovms::StatusCode::INVALID_SHAPE) << status.string(); +} + +TEST_F(CAPIPredictValidation, RequestNegativeShapeValues) { + modelConfig.setBatchingParams("auto"); + preparePredictRequest(request, + {{"Input_FP32_1_224_224_3_NHWC", + std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, + {"Input_U8_1_3_62_62_NCHW", + std::tuple{{1, 3, -62, 62}, ovms::Precision::U8}}, + {"Input_I64_1_6_128_128_16_NCDHW", + std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, + {"Input_U16_1_2_8_4_NCHW", + std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, requestData); auto status = instance->mockValidate(&request); @@ -271,13 +305,13 @@ TEST_F(CAPIPredictValidation, RequestWrongShapeValuesTwoInputsOneWrong) { // on modelConfig.parseShapeParameter("{\"Input_U8_1_3_62_62_NCHW\": \"auto\"}"); preparePredictRequest(request, {{"Input_FP32_1_224_224_3_NHWC", - std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, + std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, {"Input_U8_1_3_62_62_NCHW", - std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, + std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, {"Input_I64_1_6_128_128_16_NCDHW", - std::tuple{{1, 6, 128, 128, 17}, ovms::Precision::I64}}, + std::tuple{{1, 6, 128, 128, 17}, ovms::Precision::I64}}, {"Input_U16_1_2_8_4_NCHW", - std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, + std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, requestData); auto status = instance->mockValidate(&request); @@ -288,13 +322,13 @@ TEST_F(CAPIPredictValidation, RequestWrongShapeValuesAuto) { modelConfig.parseShapeParameter("{\"Input_U8_1_3_62_62_NCHW\": \"auto\"}"); preparePredictRequest(request, {{"Input_FP32_1_224_224_3_NHWC", - std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, + std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, {"Input_U8_1_3_62_62_NCHW", - std::tuple{{1, 3, 61, 62}, ovms::Precision::U8}}, + std::tuple{{1, 3, 61, 62}, ovms::Precision::U8}}, {"Input_I64_1_6_128_128_16_NCDHW", - std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, + std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, {"Input_U16_1_2_8_4_NCHW", - std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, + std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, requestData); auto status = instance->mockValidate(&request); EXPECT_EQ(status, ovms::StatusCode::RESHAPE_REQUIRED) << status.string(); @@ -304,13 +338,13 @@ TEST_F(CAPIPredictValidation, RequestWrongShapeValuesAutoTwoInputs) { modelConfig.parseShapeParameter("{\"Input_U8_1_3_62_62_NCHW\": \"auto\", \"Input_U16_1_2_8_4_NCHW\": \"auto\"}"); preparePredictRequest(request, {{"Input_FP32_1_224_224_3_NHWC", - std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, + std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, {"Input_U8_1_3_62_62_NCHW", - std::tuple{{1, 3, 61, 62}, ovms::Precision::U8}}, + std::tuple{{1, 3, 61, 62}, ovms::Precision::U8}}, {"Input_I64_1_6_128_128_16_NCDHW", - std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, + std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, {"Input_U16_1_2_8_4_NCHW", - std::tuple{{1, 2, 2, 4}, ovms::Precision::U16}}}, + std::tuple{{1, 2, 2, 4}, ovms::Precision::U16}}}, requestData); auto status = instance->mockValidate(&request); EXPECT_EQ(status, ovms::StatusCode::RESHAPE_REQUIRED); @@ -320,13 +354,13 @@ TEST_F(CAPIPredictValidation, RequestWrongShapeValuesAutoNoNamedInput) { modelConfig.parseShapeParameter("auto"); preparePredictRequest(request, {{"Input_FP32_1_224_224_3_NHWC", - std::tuple{{1, 214, 224, 3}, ovms::Precision::FP32}}, + std::tuple{{1, 214, 224, 3}, ovms::Precision::FP32}}, {"Input_U8_1_3_62_62_NCHW", - std::tuple{{1, 2, 61, 62}, ovms::Precision::U8}}, + std::tuple{{1, 2, 61, 62}, ovms::Precision::U8}}, {"Input_I64_1_6_128_128_16_NCDHW", - std::tuple{{1, 1, 128, 128, 16}, ovms::Precision::I64}}, + std::tuple{{1, 1, 128, 128, 16}, ovms::Precision::I64}}, {"Input_U16_1_2_8_4_NCHW", - std::tuple{{1, 3, 2, 4}, ovms::Precision::U16}}}, + std::tuple{{1, 3, 2, 4}, ovms::Precision::U16}}}, requestData); auto status = instance->mockValidate(&request); EXPECT_EQ(status, ovms::StatusCode::RESHAPE_REQUIRED); @@ -336,13 +370,13 @@ TEST_F(CAPIPredictValidation, RequestWrongShapeValuesAutoFirstDim) { modelConfig.parseShapeParameter("{\"Input_U8_1_3_62_62_NCHW\": \"auto\"}"); preparePredictRequest(request, {{"Input_FP32_1_224_224_3_NHWC", - std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, + std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, {"Input_U8_1_3_62_62_NCHW", - std::tuple{{2, 3, 62, 62}, ovms::Precision::U8}}, + std::tuple{{2, 3, 62, 62}, ovms::Precision::U8}}, {"Input_I64_1_6_128_128_16_NCDHW", - std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, + std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, {"Input_U16_1_2_8_4_NCHW", - std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, + std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, requestData); auto status = instance->mockValidate(&request); @@ -359,13 +393,13 @@ TEST_F(CAPIPredictValidation, RequestWrongShapeValuesFixed) { modelConfig.parseShapeParameter("{\"Input_U8_1_3_62_62_NCHW\": \"(1,3,62,62)\"}"); preparePredictRequest(request, {{"Input_FP32_1_224_224_3_NHWC", - std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, + std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, {"Input_U8_1_3_62_62_NCHW", - std::tuple{{1, 4, 63, 63}, ovms::Precision::U8}}, + std::tuple{{1, 4, 63, 63}, ovms::Precision::U8}}, {"Input_I64_1_6_128_128_16_NCDHW", - std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, + std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, {"Input_U16_1_2_8_4_NCHW", - std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, + std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, requestData); auto status = instance->mockValidate(&request); EXPECT_EQ(status, ovms::StatusCode::INVALID_SHAPE) << status.string(); @@ -375,13 +409,13 @@ TEST_F(CAPIPredictValidation, RequestWrongShapeValuesFixedFirstDim) { modelConfig.parseShapeParameter("{\"Input_U8_1_3_62_62_NCHW\": \"(1,3,62,62)\"}"); preparePredictRequest(request, {{"Input_FP32_1_224_224_3_NHWC", - std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, + std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, {"Input_U8_1_3_62_62_NCHW", - std::tuple{{2, 3, 62, 62}, ovms::Precision::U8}}, + std::tuple{{2, 3, 62, 62}, ovms::Precision::U8}}, {"Input_I64_1_6_128_128_16_NCDHW", - std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, + std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, {"Input_U16_1_2_8_4_NCHW", - std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, + std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, requestData); auto status = instance->mockValidate(&request); EXPECT_EQ(status, ovms::StatusCode::INVALID_BATCH_SIZE) << status.string(); @@ -391,13 +425,13 @@ TEST_F(CAPIPredictValidation, RequestIncorrectContentSize) { decrementBufferSize = 1; preparePredictRequest(request, {{"Input_FP32_1_224_224_3_NHWC", - std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, + std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, {"Input_U8_1_3_62_62_NCHW", - std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, + std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, {"Input_I64_1_6_128_128_16_NCDHW", - std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, + std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, {"Input_U16_1_2_8_4_NCHW", - std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, + std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, requestData, decrementBufferSize); auto status = instance->mockValidate(&request); EXPECT_EQ(status, ovms::StatusCode::INVALID_CONTENT_SIZE) << status.string(); @@ -409,7 +443,7 @@ TEST_F(CAPIPredictValidation, RequestIncorrectInputWithNoBuffer) { ON_CALL(*instance, getInputsInfo()).WillByDefault(ReturnRef(servableInputs)); InferenceRequest request("NOT_USED", 42); - std::array shape{1, 1, 1, 1}; + std::array shape{1, 1, 1, 1}; request.addInput("Input_FP32_1_1_1_1_NHWC", OVMS_DATATYPE_FP32, shape.data(), shape.size()); auto status = instance->mockValidate(&request); EXPECT_EQ(status, ovms::StatusCode::INVALID_CONTENT_SIZE) << status.string(); @@ -424,7 +458,7 @@ TEST_F(CAPIPredictValidation, RequestIncorrectContentSizeZero) { preparePredictRequest(request, {{"Input_FP32_1_224_224_3_NHWC", - std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}}, + std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}}, requestData, decrementBufferSize); auto status = instance->mockValidate(&request); EXPECT_EQ(status, ovms::StatusCode::INVALID_CONTENT_SIZE) << status.string(); @@ -433,13 +467,13 @@ TEST_F(CAPIPredictValidation, RequestIncorrectContentSizeZero) { TEST_F(CAPIPredictValidation, RequestIncorrectBufferType) { preparePredictRequest(request, {{"Input_FP32_1_224_224_3_NHWC", - std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, + std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, {"Input_U8_1_3_62_62_NCHW", - std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, + std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, {"Input_I64_1_6_128_128_16_NCDHW", - std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, + std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, {"Input_U16_1_2_8_4_NCHW", - std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, + std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, requestData, decrementBufferSize, static_cast(999)); auto status = instance->mockValidate(&request); EXPECT_EQ(status, ovms::StatusCode::INVALID_BUFFER_TYPE) << status.string(); @@ -448,13 +482,13 @@ TEST_F(CAPIPredictValidation, RequestIncorrectBufferType) { TEST_F(CAPIPredictValidation, RequestNegativeBufferType) { preparePredictRequest(request, {{"Input_FP32_1_224_224_3_NHWC", - std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, + std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, {"Input_U8_1_3_62_62_NCHW", - std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, + std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, {"Input_I64_1_6_128_128_16_NCDHW", - std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, + std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, {"Input_U16_1_2_8_4_NCHW", - std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, + std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, requestData, decrementBufferSize, static_cast(-22)); auto status = instance->mockValidate(&request); EXPECT_EQ(status, ovms::StatusCode::INVALID_BUFFER_TYPE) << status.string(); @@ -463,13 +497,13 @@ TEST_F(CAPIPredictValidation, RequestNegativeBufferType) { TEST_F(CAPIPredictValidation, RequestIncorectDeviceId) { preparePredictRequest(request, {{"Input_FP32_1_224_224_3_NHWC", - std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, + std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, {"Input_U8_1_3_62_62_NCHW", - std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, + std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, {"Input_I64_1_6_128_128_16_NCDHW", - std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, + std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, {"Input_U16_1_2_8_4_NCHW", - std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, + std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, requestData, decrementBufferSize, OVMS_BUFFERTYPE_CPU, 1); auto status = instance->mockValidate(&request); EXPECT_EQ(status, ovms::StatusCode::INVALID_DEVICE_ID) << status.string(); @@ -478,13 +512,13 @@ TEST_F(CAPIPredictValidation, RequestIncorectDeviceId) { TEST_F(CAPIPredictValidation, RequestIncorectBufferType) { preparePredictRequest(request, {{"Input_FP32_1_224_224_3_NHWC", - std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, + std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, {"Input_U8_1_3_62_62_NCHW", - std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, + std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, {"Input_I64_1_6_128_128_16_NCDHW", - std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, + std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, {"Input_U16_1_2_8_4_NCHW", - std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, + std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, requestData, decrementBufferSize, OVMS_BUFFERTYPE_GPU); auto status = instance->mockValidate(&request); EXPECT_EQ(status, ovms::StatusCode::INVALID_BUFFER_TYPE) << status.string(); @@ -494,13 +528,13 @@ TEST_F(CAPIPredictValidation, RequestCorectDeviceId) { GTEST_SKIP() << "Enable when Other buffer types are supported"; preparePredictRequest(request, {{"Input_FP32_1_224_224_3_NHWC", - std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, + std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, {"Input_U8_1_3_62_62_NCHW", - std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, + std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, {"Input_I64_1_6_128_128_16_NCDHW", - std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, + std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, {"Input_U16_1_2_8_4_NCHW", - std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, + std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, requestData, decrementBufferSize, OVMS_BUFFERTYPE_GPU, 1); auto status = instance->mockValidate(&request); EXPECT_EQ(status, ovms::StatusCode::OK) << status.string(); @@ -509,13 +543,13 @@ TEST_F(CAPIPredictValidation, RequestCorectDeviceId) { TEST_F(CAPIPredictValidation, RequestNotNullDeviceId) { preparePredictRequest(request, {{"Input_FP32_1_224_224_3_NHWC", - std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, + std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, {"Input_U8_1_3_62_62_NCHW", - std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, + std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, {"Input_I64_1_6_128_128_16_NCDHW", - std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, + std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, {"Input_U16_1_2_8_4_NCHW", - std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, + std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, requestData, decrementBufferSize, OVMS_BUFFERTYPE_CPU, 1); auto status = instance->mockValidate(&request); EXPECT_EQ(status, ovms::StatusCode::INVALID_DEVICE_ID) << status.string(); @@ -526,13 +560,13 @@ TEST_F(CAPIPredictValidation, RequestIncorrectContentSizeBatchAuto) { decrementBufferSize = 1; preparePredictRequest(request, {{"Input_FP32_1_224_224_3_NHWC", - std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, + std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, {"Input_U8_1_3_62_62_NCHW", - std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, + std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, {"Input_I64_1_6_128_128_16_NCDHW", - std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, + std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, {"Input_U16_1_2_8_4_NCHW", - std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, + std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, requestData, decrementBufferSize); auto status = instance->mockValidate(&request); EXPECT_EQ(status, ovms::StatusCode::INVALID_CONTENT_SIZE) << status.string(); @@ -543,13 +577,13 @@ TEST_F(CAPIPredictValidation, RequestIncorrectContentSizeShapeAuto) { decrementBufferSize = 1; preparePredictRequest(request, {{"Input_FP32_1_224_224_3_NHWC", - std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, + std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, {"Input_U8_1_3_62_62_NCHW", - std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, + std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, {"Input_I64_1_6_128_128_16_NCDHW", - std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, + std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, {"Input_U16_1_2_8_4_NCHW", - std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, + std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, requestData, decrementBufferSize); auto status = instance->mockValidate(&request); EXPECT_EQ(status, ovms::StatusCode::INVALID_CONTENT_SIZE) << status.string(); @@ -584,7 +618,7 @@ TEST_P(CAPIPredictValidationInputTensorContent, RequestCorrectContentSizeInputTe ON_CALL(*instance, getModelConfig()).WillByDefault(ReturnRef(modelConfig)); preparePredictRequest(request, {{inputName, - std::tuple{{1, 224, 224, 3}, testedPrecision}}}, + std::tuple{{1, 224, 224, 3}, testedPrecision}}}, requestData, // data, false); auto status = instance->mockValidate(&request); @@ -594,13 +628,13 @@ TEST_P(CAPIPredictValidationInputTensorContent, RequestCorrectContentSizeInputTe TEST_F(CAPIPredictValidation, RequestWrongPrecision) { preparePredictRequest(request, {{"Input_FP32_1_224_224_3_NHWC", - std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, + std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, {"Input_U8_1_3_62_62_NCHW", - std::tuple{{1, 3, 62, 62}, ovms::Precision::Q78}}, + std::tuple{{1, 3, 62, 62}, ovms::Precision::Q78}}, {"Input_I64_1_6_128_128_16_NCDHW", - std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, + std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, {"Input_U16_1_2_8_4_NCHW", - std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, + std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, requestData); auto status = instance->mockValidate(&request); @@ -622,9 +656,9 @@ class CAPIPredictValidationArbitraryBatchPosition : public CAPIPredictValidation preparePredictRequest(request, { {"Input_FP32_224_224_3_1_HWCN", - std::tuple{{224, 224, 3, 1}, ovms::Precision::FP32}}, + std::tuple{{224, 224, 3, 1}, ovms::Precision::FP32}}, {"Input_U8_3_1_128_CNH", - std::tuple{{3, 1, 128}, ovms::Precision::U8}}, + std::tuple{{3, 1, 128}, ovms::Precision::U8}}, }, requestData); } @@ -640,9 +674,9 @@ TEST_F(CAPIPredictValidationArbitraryBatchPosition, RequestWrongBatchSize) { preparePredictRequest(request, { {"Input_FP32_224_224_3_1_HWCN", - std::tuple{{224, 224, 3, 10}, ovms::Precision::FP32}}, + std::tuple{{224, 224, 3, 10}, ovms::Precision::FP32}}, {"Input_U8_3_1_128_CNH", - std::tuple{{3, 1, 128}, ovms::Precision::U8}}, + std::tuple{{3, 1, 128}, ovms::Precision::U8}}, }, requestData); auto status = instance->mockValidate(&request); @@ -655,9 +689,9 @@ TEST_F(CAPIPredictValidationArbitraryBatchPosition, RequestWrongBatchSizeAuto) { preparePredictRequest(request, { {"Input_FP32_224_224_3_1_HWCN", - std::tuple{{224, 224, 3, 10}, ovms::Precision::FP32}}, + std::tuple{{224, 224, 3, 10}, ovms::Precision::FP32}}, {"Input_U8_3_1_128_CNH", - std::tuple{{3, 1, 128}, ovms::Precision::U8}}, + std::tuple{{3, 1, 128}, ovms::Precision::U8}}, }, requestData); @@ -670,9 +704,9 @@ TEST_F(CAPIPredictValidationArbitraryBatchPosition, RequestWrongShapeValues) { preparePredictRequest(request, { {"Input_FP32_224_224_3_1_HWCN", - std::tuple{{221, 224, 3, 1}, ovms::Precision::FP32}}, + std::tuple{{221, 224, 3, 1}, ovms::Precision::FP32}}, {"Input_U8_3_1_128_CNH", - std::tuple{{3, 1, 128}, ovms::Precision::U8}}, + std::tuple{{3, 1, 128}, ovms::Precision::U8}}, }, requestData); auto status = instance->mockValidate(&request); @@ -685,9 +719,9 @@ TEST_F(CAPIPredictValidationArbitraryBatchPosition, RequestWrongShapeValuesAuto) preparePredictRequest(request, { {"Input_FP32_224_224_3_1_HWCN", - std::tuple{{10, 224, 3, 1}, ovms::Precision::FP32}}, + std::tuple{{10, 224, 3, 1}, ovms::Precision::FP32}}, {"Input_U8_3_1_128_CNH", - std::tuple{{3, 1, 128}, ovms::Precision::U8}}, + std::tuple{{3, 1, 128}, ovms::Precision::U8}}, }, requestData); auto status = instance->mockValidate(&request); @@ -710,9 +744,9 @@ class CAPIPredictValidationDynamicModel : public CAPIPredictValidation { preparePredictRequest(request, { {"Input_FP32_any_224:512_224:512_3_NHWC", - std::tuple{{requestBatchSize, 300, 320, 3}, ovms::Precision::FP32}}, + std::tuple{{requestBatchSize, 300, 320, 3}, ovms::Precision::FP32}}, {"Input_U8_100:200_any_CN", - std::tuple{{101, requestBatchSize}, ovms::Precision::U8}}, + std::tuple{{101, requestBatchSize}, ovms::Precision::U8}}, }, requestData); } @@ -727,9 +761,9 @@ TEST_F(CAPIPredictValidationDynamicModel, RequestBatchNotInRangeFirstPosition) { preparePredictRequest(request, { {"Input_FP32_any_224:512_224:512_3_NHWC", - std::tuple{{16, 300, 320, 3}, ovms::Precision::FP32}}, + std::tuple{{16, 300, 320, 3}, ovms::Precision::FP32}}, {"Input_U8_100:200_any_CN", - std::tuple{{101, 16}, ovms::Precision::U8}}, + std::tuple{{101, 16}, ovms::Precision::U8}}, }, requestData); @@ -742,9 +776,9 @@ TEST_F(CAPIPredictValidationDynamicModel, RequestDimensionNotInRangeFirstPositio preparePredictRequest(request, { {"Input_FP32_any_224:512_224:512_3_NHWC", - std::tuple{{16, 300, 320, 3}, ovms::Precision::FP32}}, + std::tuple{{16, 300, 320, 3}, ovms::Precision::FP32}}, {"Input_U8_100:200_any_CN", - std::tuple{{98, 1}, ovms::Precision::U8}}, + std::tuple{{98, 1}, ovms::Precision::U8}}, }, requestData); auto status = instance->mockValidate(&request); @@ -755,9 +789,9 @@ TEST_F(CAPIPredictValidationDynamicModel, RequestBatchNotInRangeSecondPosition) preparePredictRequest(request, { {"Input_FP32_any_224:512_224:512_3_NHWC", - std::tuple{{16, 300, 320, 3}, ovms::Precision::FP32}}, + std::tuple{{16, 300, 320, 3}, ovms::Precision::FP32}}, {"Input_U8_100:200_any_CN", - std::tuple{{100, 98}, ovms::Precision::U8}}, + std::tuple{{100, 98}, ovms::Precision::U8}}, }, requestData); servableInputs["Input_U8_100:200_any_CN"] = std::make_shared("Input_U8_100:200_any_CN", ovms::Precision::U8, ovms::Shape{{100, 200}, {1, 5}}, ovms::Layout{"CN"}); @@ -769,9 +803,9 @@ TEST_F(CAPIPredictValidationDynamicModel, RequestDimensionNotInRangeSecondPositi preparePredictRequest(request, { {"Input_FP32_any_224:512_224:512_3_NHWC", - std::tuple{{1, 223, 224, 3}, ovms::Precision::FP32}}, + std::tuple{{1, 223, 224, 3}, ovms::Precision::FP32}}, {"Input_U8_100:200_any_CN", - std::tuple{{101, 16}, ovms::Precision::U8}}, + std::tuple{{101, 16}, ovms::Precision::U8}}, }, requestData); auto status = instance->mockValidate(&request); @@ -783,9 +817,9 @@ TEST_F(CAPIPredictValidationDynamicModel, RequestDimensionInRangeWrongTensorCont preparePredictRequest(request, { {"Input_FP32_any_224:512_224:512_3_NHWC", - std::tuple{{16, 300, 320, 3}, ovms::Precision::FP32}}, + std::tuple{{16, 300, 320, 3}, ovms::Precision::FP32}}, {"Input_U8_100:200_any_CN", - std::tuple{{101, 16}, ovms::Precision::U8}}, + std::tuple{{101, 16}, ovms::Precision::U8}}, }, requestData, decrementBufferSize); auto status = instance->mockValidate(&request); @@ -809,11 +843,11 @@ class CAPIPredictValidationPrecision : public ::testing::TestWithParamsetPrecision(testedPrecision); + mockedInputsInfo[tensorName] = createTensorInfoCopyWithPrecision(mockedInputsInfo[tensorName], testedPrecision); preparePredictRequest(request, { {tensorName, - std::tuple{{1, DUMMY_MODEL_INPUT_SIZE}, testedPrecision}}, + std::tuple{{1, DUMMY_MODEL_INPUT_SIZE}, testedPrecision}}, }, requestData); auto status = ovms::request_validation_utils::validate(request, mockedInputsInfo, "dummy", ovms::model_version_t{1}); diff --git a/src/test/configs/emptyConfigWithMetrics.json b/src/test/configs/emptyConfigWithMetrics.json new file mode 100644 index 0000000000..24b030b8e4 --- /dev/null +++ b/src/test/configs/emptyConfigWithMetrics.json @@ -0,0 +1,14 @@ +{ + "monitoring": { + "metrics": { + "enable": true, + "metrics_list": [ + "ovms_current_requests", + "ovms_infer_req_active", + "ovms_requests_success", + "ovms_infer_req_queue_size"] + } + }, + "model_config_list": [ + ] +} diff --git a/src/test/custom_loader_test.cpp b/src/test/custom_loader_test.cpp index 3b36e1145f..61f98e9c61 100644 --- a/src/test/custom_loader_test.cpp +++ b/src/test/custom_loader_test.cpp @@ -82,6 +82,28 @@ const char* custom_loader_config_model = R"({ ] })"; +// config_model_with_customloader +const char* custom_loader_config_model_relative_paths = R"({ + "custom_loader_config_list":[ + { + "config":{ + "loader_name":"sample-loader", + "library_path": "libsampleloader.so" + } + } + ], + "model_config_list":[ + { + "config":{ + "name":"dummy", + "base_path": "test_cl_models/model1", + "nireq": 1, + "custom_loader_options": {"loader_name": "sample-loader", "model_file": "dummy.xml", "bin_file": "dummy.bin"} + } + } + ] + })"; + // config_no_model_with_customloader const char* custom_loader_config_model_deleted = R"({ "custom_loader_config_list":[ @@ -454,7 +476,7 @@ TEST_F(TestCustomLoader, CustomLoaderConfigMatchingSchema) { rapidjson::Document customloaderConfigMatchingSchemaParsed; customloaderConfigMatchingSchemaParsed.Parse(customloaderConfigMatchingSchema); - auto result = ovms::validateJsonAgainstSchema(customloaderConfigMatchingSchemaParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(customloaderConfigMatchingSchemaParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::OK); } @@ -475,7 +497,7 @@ TEST_F(TestCustomLoader, CustomLoaderConfigMissingLoaderName) { rapidjson::Document customloaderConfigMissingLoaderNameParsed; customloaderConfigMissingLoaderNameParsed.Parse(customloaderConfigMissingLoaderName); - auto result = ovms::validateJsonAgainstSchema(customloaderConfigMissingLoaderNameParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(customloaderConfigMissingLoaderNameParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -496,7 +518,7 @@ TEST_F(TestCustomLoader, CustomLoaderConfigMissingLibraryPath) { rapidjson::Document customloaderConfigMissingLibraryPathParsed; customloaderConfigMissingLibraryPathParsed.Parse(customloaderConfigMissingLibraryPath); - auto result = ovms::validateJsonAgainstSchema(customloaderConfigMissingLibraryPathParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(customloaderConfigMissingLibraryPathParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -517,7 +539,7 @@ TEST_F(TestCustomLoader, CustomLoaderConfigMissingLoaderConfig) { rapidjson::Document customloaderConfigMissingLoaderConfigParsed; customloaderConfigMissingLoaderConfigParsed.Parse(customloaderConfigMissingLoaderConfig); - auto result = ovms::validateJsonAgainstSchema(customloaderConfigMissingLoaderConfigParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(customloaderConfigMissingLoaderConfigParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::OK); } @@ -538,7 +560,7 @@ TEST_F(TestCustomLoader, CustomLoaderConfigInvalidCustomLoaderConfig) { rapidjson::Document customloaderConfigInvalidCustomLoaderConfigParsed; customloaderConfigInvalidCustomLoaderConfigParsed.Parse(customloaderConfigInvalidCustomLoaderConfig); - auto result = ovms::validateJsonAgainstSchema(customloaderConfigInvalidCustomLoaderConfigParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(customloaderConfigInvalidCustomLoaderConfigParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -559,7 +581,7 @@ TEST_F(TestCustomLoader, CustomLoaderConfigMissingLoaderNameInCustomLoaderOption rapidjson::Document customloaderConfigMissingLoaderNameInCustomLoaderOptionsParsed; customloaderConfigMissingLoaderNameInCustomLoaderOptionsParsed.Parse(customloaderConfigMissingLoaderNameInCustomLoaderOptions); - auto result = ovms::validateJsonAgainstSchema(customloaderConfigMissingLoaderNameInCustomLoaderOptionsParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(customloaderConfigMissingLoaderNameInCustomLoaderOptionsParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -580,7 +602,7 @@ TEST_F(TestCustomLoader, CustomLoaderConfigMultiplePropertiesInCustomLoaderOptio rapidjson::Document customloaderConfigMultiplePropertiesInCustomLoaderOptionsParsed; customloaderConfigMultiplePropertiesInCustomLoaderOptionsParsed.Parse(customloaderConfigMultiplePropertiesInCustomLoaderOptions); - auto result = ovms::validateJsonAgainstSchema(customloaderConfigMultiplePropertiesInCustomLoaderOptionsParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(customloaderConfigMultiplePropertiesInCustomLoaderOptionsParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::OK); } @@ -599,6 +621,27 @@ TEST_F(TestCustomLoader, CustomLoaderPrediction) { createConfigFileWithContent(configStr, fileToReload); ASSERT_EQ(manager.loadConfig(fileToReload), ovms::StatusCode::OK); + tensorflow::serving::PredictRequest request; + preparePredictRequest(request, + {{DUMMY_MODEL_INPUT_NAME, + std::tuple{{1, 10}, ovms::Precision::FP32}}}); + performPredict("dummy", 1, request); +} + +TEST_F(TestCustomLoader, CustomLoaderPredictionRelativePath) { + // Copy dummy model to temporary destination + std::filesystem::copy("/ovms/src/test/dummy", cl_model_1_path, std::filesystem::copy_options::recursive); + std::filesystem::copy("/ovms/bazel-bin/src/libsampleloader.so", cl_models_path, std::filesystem::copy_options::recursive); + + // Replace model path in the config string + std::string configStr = custom_loader_config_model_relative_paths; + configStr.replace(configStr.find("test_cl_models"), std::string("test_cl_models").size(), cl_models_path); + + // Create config file + std::string fileToReload = cl_models_path + "/cl_config.json"; + createConfigFileWithContent(configStr, fileToReload); + ASSERT_EQ(manager.loadConfig(fileToReload), ovms::StatusCode::OK); + tensorflow::serving::PredictRequest request; preparePredictRequest(request, {{DUMMY_MODEL_INPUT_NAME, @@ -651,7 +694,7 @@ TEST_F(TestCustomLoader, CustomLoaderPredictDeletePredict) { tensorflow::serving::PredictRequest request; preparePredictRequest(request, {{DUMMY_MODEL_INPUT_NAME, - std::tuple{{1, 10}, ovms::Precision::FP32}}}); + std::tuple{{1, 10}, ovms::Precision::FP32}}}); tensorflow::serving::PredictResponse response; ASSERT_EQ(performInferenceWithRequest(request, response), ovms::StatusCode::OK); @@ -678,7 +721,7 @@ TEST_F(TestCustomLoader, CustomLoaderPredictNewVersionPredict) { tensorflow::serving::PredictRequest request; preparePredictRequest(request, {{DUMMY_MODEL_INPUT_NAME, - std::tuple{{1, 10}, ovms::Precision::FP32}}}); + std::tuple{{1, 10}, ovms::Precision::FP32}}}); performPredict("dummy", 1, request); // Copy version 1 to version 2 @@ -688,7 +731,7 @@ TEST_F(TestCustomLoader, CustomLoaderPredictNewVersionPredict) { preparePredictRequest(request, {{DUMMY_MODEL_INPUT_NAME, - std::tuple{{1, 10}, ovms::Precision::FP32}}}); + std::tuple{{1, 10}, ovms::Precision::FP32}}}); performPredict("dummy", 2, request); } @@ -708,7 +751,7 @@ TEST_F(TestCustomLoader, CustomLoaderPredictNewModelPredict) { tensorflow::serving::PredictRequest request; preparePredictRequest(request, {{DUMMY_MODEL_INPUT_NAME, - std::tuple{{1, 10}, ovms::Precision::FP32}}}); + std::tuple{{1, 10}, ovms::Precision::FP32}}}); performPredict("dummy", 1, request); // Copy model1 to model2 @@ -725,7 +768,7 @@ TEST_F(TestCustomLoader, CustomLoaderPredictNewModelPredict) { preparePredictRequest(request, {{DUMMY_MODEL_INPUT_NAME, - std::tuple{{1, 10}, ovms::Precision::FP32}}}); + std::tuple{{1, 10}, ovms::Precision::FP32}}}); performPredict("dummy", 1, request); performPredict("dummy-new", 1, request); } @@ -746,7 +789,7 @@ TEST_F(TestCustomLoader, CustomLoaderPredictRemoveCustomLoaderOptionsPredict) { tensorflow::serving::PredictRequest request; preparePredictRequest(request, {{DUMMY_MODEL_INPUT_NAME, - std::tuple{{1, 10}, ovms::Precision::FP32}}}); + std::tuple{{1, 10}, ovms::Precision::FP32}}}); performPredict("dummy", 1, request); // Replace model path in the config string @@ -776,7 +819,7 @@ TEST_F(TestCustomLoader, PredictNormalModelAddCustomLoaderOptionsPredict) { tensorflow::serving::PredictRequest request; preparePredictRequest(request, {{DUMMY_MODEL_INPUT_NAME, - std::tuple{{1, 10}, ovms::Precision::FP32}}}); + std::tuple{{1, 10}, ovms::Precision::FP32}}}); performPredict("dummy", 1, request); // Replace model path in the config string @@ -807,7 +850,7 @@ TEST_F(TestCustomLoader, CustomLoaderOptionWithUnknownLibrary) { tensorflow::serving::PredictRequest request; preparePredictRequest(request, {{DUMMY_MODEL_INPUT_NAME, - std::tuple{{1, 10}, ovms::Precision::FP32}}}); + std::tuple{{1, 10}, ovms::Precision::FP32}}}); tensorflow::serving::PredictResponse response; ASSERT_EQ(performInferenceWithRequest(request, response), ovms::StatusCode::MODEL_VERSION_MISSING); } @@ -825,7 +868,7 @@ TEST_F(TestCustomLoader, CustomLoaderWithMissingModelFiles) { tensorflow::serving::PredictRequest request; preparePredictRequest(request, {{DUMMY_MODEL_INPUT_NAME, - std::tuple{{1, 10}, ovms::Precision::FP32}}}); + std::tuple{{1, 10}, ovms::Precision::FP32}}}); tensorflow::serving::PredictResponse response; ASSERT_EQ(performInferenceWithRequest(request, response), ovms::StatusCode::MODEL_VERSION_MISSING); } @@ -897,7 +940,7 @@ TEST_F(TestCustomLoader, CustomLoaderPredictionUsingManyCustomLoaders) { tensorflow::serving::PredictRequest request; preparePredictRequest(request, {{DUMMY_MODEL_INPUT_NAME, - std::tuple{{1, 10}, ovms::Precision::FP32}}}); + std::tuple{{1, 10}, ovms::Precision::FP32}}}); performPredict("dummy-a", 1, request); performPredict("dummy-b", 1, request); @@ -1041,7 +1084,7 @@ TEST_F(TestCustomLoader, CustomLoaderMultipleLoaderWithSameLoaderName) { tensorflow::serving::PredictRequest request; preparePredictRequest(request, {{DUMMY_MODEL_INPUT_NAME, - std::tuple{{1, 10}, ovms::Precision::FP32}}}); + std::tuple{{1, 10}, ovms::Precision::FP32}}}); performPredict("dummy", 1, request); } diff --git a/src/test/custom_node_buffersqueue_test.cpp b/src/test/custom_node_buffersqueue_test.cpp index 99dcf20e12..0eb4d3a390 100644 --- a/src/test/custom_node_buffersqueue_test.cpp +++ b/src/test/custom_node_buffersqueue_test.cpp @@ -51,11 +51,6 @@ TEST(CustomNodeBuffersQueue, GetAllBuffers) { } } -void getBufferAndReturn(BuffersQueue& buffersQueue) { - void* buffer = buffersQueue.getBuffer(); - ASSERT_NE(nullptr, buffer) << "Failed to get buffer"; -} - TEST(CustomNodeBuffersQueue, GetAllBuffersThenNullptrForNextRequest) { const std::string content{"abc"}; size_t buffersCount = 1; diff --git a/src/test/custom_node_output_allocator_test.cpp b/src/test/custom_node_output_allocator_test.cpp index 6ebee111a2..b2a4e07a4c 100644 --- a/src/test/custom_node_output_allocator_test.cpp +++ b/src/test/custom_node_output_allocator_test.cpp @@ -60,21 +60,32 @@ int NodeLibraryCheckingReleaseCalled::release(void* ptr, void* customNodeLibrary bool NodeLibraryCheckingReleaseCalled::releaseBufferCalled = false; -class CustomNodeOutputAllocatorCheckingFreeCalled : public CustomNodeOutputAllocator { - bool freeCalled = false; +class CustomNodeOutputAllocatorCheckingFreeCalled { + // Since test is copying/moving the allocator to ov::Tensor class + // we need to use counter to ensure how many times deallocate was called + std::shared_ptr freeCallCount{std::make_shared(0)}; + std::shared_ptr allocImpl; public: CustomNodeOutputAllocatorCheckingFreeCalled(struct CustomNodeTensor tensor, NodeLibrary nodeLibrary, void* customNodeLibraryInternalManager) : - CustomNodeOutputAllocator(tensor, nodeLibrary, customNodeLibraryInternalManager) { + allocImpl(std::make_shared(tensor, nodeLibrary, customNodeLibraryInternalManager)) { } ~CustomNodeOutputAllocatorCheckingFreeCalled() { - EXPECT_TRUE(freeCalled); + EXPECT_EQ(*freeCallCount, 1); } - void deallocate(void* handle, const size_t bytes, size_t alignment) override { - CustomNodeOutputAllocator::deallocate(handle, bytes, alignment); - freeCalled = true; + void* allocate(const size_t bytes, const size_t alignment) { + return allocImpl->allocate(bytes, alignment); + } + + void deallocate(void* handle, const size_t bytes, size_t alignment) { + allocImpl->deallocate(handle, bytes, alignment); + (*freeCallCount)++; + } + + bool is_equal(const CustomNodeOutputAllocatorCheckingFreeCalled& other) const { + return allocImpl->is_equal(*other.allocImpl); } }; @@ -96,8 +107,7 @@ TEST(CustomNodeOutputAllocator, TensorDeallocationCallsReleaseBuffer) { NodeLibraryCheckingReleaseCalled::getOutputsInfo, NodeLibraryCheckingReleaseCalled::release}; void* customNodeLibraryInternalManager = nullptr; - std::shared_ptr customNodeOutputAllocator = std::make_shared(tensor, library, customNodeLibraryInternalManager); - ov::Allocator alloc(customNodeOutputAllocator); + CustomNodeOutputAllocatorCheckingFreeCalled alloc(tensor, library, customNodeLibraryInternalManager); EXPECT_FALSE(NodeLibraryCheckingReleaseCalled::releaseBufferCalled); { auto elemType = ovmsPrecisionToIE2Precision(Precision::FP32); @@ -149,8 +159,7 @@ TEST(CustomNodeOutputAllocator, TensorReturnsCorrectPointer) { getOutputsInfo, release}; void* customNodeLibraryInternalManager = nullptr; - std::shared_ptr customNodeOutputAllocator = std::make_shared(tensor, library, customNodeLibraryInternalManager); - ov::Allocator alloc(customNodeOutputAllocator); + CustomNodeOutputAllocator alloc(tensor, library, customNodeLibraryInternalManager); auto elemType = ovmsPrecisionToIE2Precision(Precision::FP32); shape_t shape{10}; auto tensorIE2 = std::make_shared(elemType, shape, alloc); diff --git a/src/test/deserialization_tests.cpp b/src/test/deserialization_tests.cpp index a24430c692..c723186357 100644 --- a/src/test/deserialization_tests.cpp +++ b/src/test/deserialization_tests.cpp @@ -37,8 +37,6 @@ #include "../tfs_frontend/tfs_utils.hpp" #include "test_utils.hpp" -#include - using TFTensorProto = tensorflow::TensorProto; using TFPredictRequest = tensorflow::serving::PredictRequest; @@ -102,7 +100,7 @@ class CAPIPredict : public ::testing::TestWithParam { } void SetUpTensorProto(OVMS_DataType dataType) { - std::array shape{1, DUMMY_MODEL_INPUT_SIZE}; + std::array shape{1, DUMMY_MODEL_INPUT_SIZE}; tensorCapi = std::make_unique(dataType, shape.data(), shape.size()); @@ -125,7 +123,7 @@ class CAPIPredictRequest : public CAPIPredict { protected: InferenceRequest request{"dummy", 1}; static const std::string DATA; - static constexpr std::array SHAPE{1, 10}; + static constexpr std::array SHAPE{1, 10}; void SetUp() { CAPIPredict::SetUp(); request.addInput(DUMMY_MODEL_INPUT_NAME, @@ -184,7 +182,7 @@ class GRPCPredictRequestNegative : public GRPCPredictRequest {}; TEST_P(GRPCPredictRequestNegative, ShouldReturnDeserializationErrorForPrecision) { ovms::Precision testedPrecision = GetParam(); - tensorMap[tensorName]->setPrecision(testedPrecision); + tensorMap[tensorName] = createTensorInfoCopyWithPrecision(tensorMap[tensorName], testedPrecision); ov::InferRequest inferRequest; InputSink inputSink(inferRequest); auto status = deserializePredictRequest(request, tensorMap, inputSink, isPipeline); @@ -196,7 +194,7 @@ TEST_P(GRPCPredictRequestNegative, ShouldReturnDeserializationErrorForPrecision) TEST_P(GRPCPredictRequestNegative, ShouldReturnDeserializationErrorForSetTensorException) { ovms::Precision testedPrecision = GetParam(); - tensorMap[tensorName]->setPrecision(testedPrecision); + tensorMap[tensorName] = createTensorInfoCopyWithPrecision(tensorMap[tensorName], testedPrecision); ov::InferRequest inferRequest; InputSink inputSink(inferRequest); auto status = deserializePredictRequest(request, tensorMap, inputSink, isPipeline); @@ -208,12 +206,12 @@ class MockTensorProtoDeserializatorThrowingInferenceEngine { MOCK_METHOD(ov::Tensor, deserializeTensorProto, (const tensorflow::TensorProto&, - const std::shared_ptr&)); + const std::shared_ptr&)); MOCK_METHOD(ov::Tensor, deserializeTensorProto, (const ::KFSRequest::InferInputTensor&, - const std::shared_ptr&, + const std::shared_ptr&, const std::string* buffer)); }; @@ -223,13 +221,13 @@ class MockTensorProtoDeserializator { static MockTensorProtoDeserializatorThrowingInferenceEngine* mock; static ov::Tensor deserializeTensorProto( const tensorflow::TensorProto& requestInput, - const std::shared_ptr& tensorInfo) { + const std::shared_ptr& tensorInfo) { return mock->deserializeTensorProto(requestInput, tensorInfo); } static ov::Tensor deserializeTensorProto( const ::KFSRequest::InferInputTensor& requestInput, - const std::shared_ptr& tensorInfo, + const std::shared_ptr& tensorInfo, const std::string* buffer) { return mock->deserializeTensorProto(requestInput, tensorInfo, buffer); } @@ -237,6 +235,9 @@ class MockTensorProtoDeserializator { MockTensorProtoDeserializatorThrowingInferenceEngine* MockTensorProtoDeserializator::mock = nullptr; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + TEST_F(GRPCPredictRequestNegative, ShouldReturnDeserializationErrorForSetTensorException2) { ov::Core ieCore; std::shared_ptr model = ieCore.read_model(std::filesystem::current_path().u8string() + "/src/test/dummy/1/dummy.xml"); @@ -255,6 +256,8 @@ TEST_F(GRPCPredictRequestNegative, ShouldReturnDeserializationErrorForSetTensorE EXPECT_EQ(status, ovms::StatusCode::OV_INTERNAL_DESERIALIZATION_ERROR) << status.string(); } +#pragma GCC diagnostic pop + TEST_F(GRPCPredictRequest, ShouldSuccessForSupportedPrecision) { ov::Core ieCore; std::shared_ptr model = ieCore.read_model(std::filesystem::current_path().u8string() + "/src/test/dummy/1/dummy.xml"); @@ -267,7 +270,7 @@ TEST_F(GRPCPredictRequest, ShouldSuccessForSupportedPrecision) { TEST_P(DeserializeTFTensorProtoNegative, ShouldReturnNullptrForPrecision) { ovms::Precision testedPrecision = GetParam(); - tensorMap[tensorName]->setPrecision(testedPrecision); + tensorMap[tensorName] = createTensorInfoCopyWithPrecision(tensorMap[tensorName], testedPrecision); ov::Tensor tensor = deserializeTensorProto(tensorProto, tensorMap[tensorName]); EXPECT_FALSE((bool)tensor) << "Unsupported OVMS precision:" << toString(testedPrecision) @@ -276,7 +279,7 @@ TEST_P(DeserializeTFTensorProtoNegative, ShouldReturnNullptrForPrecision) { TEST_P(DeserializeCAPITensorProtoNegative, ShouldReturnNullptrForPrecision) { ovms::Precision testedPrecision = GetParam(); - tensorMap[tensorName]->setPrecision(testedPrecision); + tensorMap[tensorName] = createTensorInfoCopyWithPrecision(tensorMap[tensorName], testedPrecision); ov::Tensor tensor = deserializeTensorProto(*tensorCapi, tensorMap[tensorName]); EXPECT_FALSE((bool)tensor) << "Unsupported OVMS precision:" << toString(testedPrecision) @@ -286,7 +289,7 @@ TEST_P(DeserializeCAPITensorProtoNegative, ShouldReturnNullptrForPrecision) { TEST_P(DeserializeTFTensorProto, ShouldReturnValidTensor) { ovms::Precision testedPrecision = GetParam(); SetUpTensorProto(getPrecisionAsDataType(testedPrecision)); - tensorMap[tensorName]->setPrecision(testedPrecision); + tensorMap[tensorName] = createTensorInfoCopyWithPrecision(tensorMap[tensorName], testedPrecision); ov::Tensor tensor = deserializeTensorProto(tensorProto, tensorMap[tensorName]); EXPECT_TRUE((bool)tensor) << "Supported OVMS precision:" << toString(testedPrecision) @@ -296,7 +299,7 @@ TEST_P(DeserializeTFTensorProto, ShouldReturnValidTensor) { TEST_P(DeserializeCAPITensor, ShouldReturnValidTensor) { ovms::Precision testedPrecision = GetParam(); SetUpTensorProto(getPrecisionAsOVMSDataType(testedPrecision)); - tensorMap[tensorName]->setPrecision(testedPrecision); + tensorMap[tensorName] = createTensorInfoCopyWithPrecision(tensorMap[tensorName], testedPrecision); ov::Tensor tensor = deserializeTensorProto(*tensorCapi, tensorMap[tensorName]); EXPECT_TRUE((bool)tensor) << "Supported OVMS precision:" << toString(testedPrecision) @@ -420,7 +423,7 @@ class DeserializeKFSTensorProtoNegative : public KserveGRPCPredict {}; TEST_P(DeserializeKFSTensorProtoNegative, ShouldReturnNullptrForPrecision) { auto [testedPrecision, getInputFromRawInputContents] = GetParam(); std::string* bufferPtr = (getInputFromRawInputContents ? &buffer : nullptr); - tensorMap[tensorName]->setPrecision(testedPrecision); + tensorMap[tensorName] = createTensorInfoCopyWithPrecision(tensorMap[tensorName], testedPrecision); ov::Tensor tensor = deserializeTensorProto(tensorProto, tensorMap[tensorName], bufferPtr); EXPECT_FALSE((bool)tensor) << "Unsupported OVMS precision:" << toString(testedPrecision) @@ -434,7 +437,7 @@ TEST_P(DeserializeKFSTensorProto, ShouldReturnValidTensor) { GTEST_SKIP() << "Not supported"; } SetUpTensorProto(TensorInfo::getPrecisionAsString(testedPrecision), getInputFromRawInputContents); - tensorMap[tensorName]->setPrecision(testedPrecision); + tensorMap[tensorName] = createTensorInfoCopyWithPrecision(tensorMap[tensorName], testedPrecision); ov::Tensor tensor = deserializeTensorProto(tensorProto, tensorMap[tensorName], bufferPtr); EXPECT_TRUE((bool)tensor) << "Supported OVMS precision:" << toString(testedPrecision) @@ -495,7 +498,7 @@ class KserveGRPCPredictRequestNegative : public KserveGRPCPredictRequest { TEST_P(KserveGRPCPredictRequestNegative, ShouldReturnDeserializationErrorForPrecision) { auto [testedPrecision, getInputFromRawInputContents] = GetParam(); - tensorMap[tensorName]->setPrecision(testedPrecision); + tensorMap[tensorName] = createTensorInfoCopyWithPrecision(tensorMap[tensorName], testedPrecision); ov::InferRequest inferRequest; InputSink inputSink(inferRequest); auto status = deserializePredictRequest(request, tensorMap, inputSink, isPipeline); @@ -509,7 +512,7 @@ TEST_P(KserveGRPCPredictRequestNegative, ShouldReturnDeserializationErrorForSetT auto [testedPrecision, getInputFromRawInputContents] = GetParam(); if (!getInputFromRawInputContents) GTEST_SKIP() << "test setup not implemented yet"; - tensorMap[tensorName]->setPrecision(testedPrecision); + tensorMap[tensorName] = createTensorInfoCopyWithPrecision(tensorMap[tensorName], testedPrecision); ov::InferRequest inferRequest; InputSink inputSink(inferRequest); auto status = deserializePredictRequest(request, tensorMap, inputSink, isPipeline); @@ -520,6 +523,9 @@ std::string toString(const std::pair& pair) { return toString(pair.first) + "_" + (pair.second ? "BufferInRequestRawInputContents" : "BufferInRequestTensorInputContents"); } +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + TEST_F(KserveGRPCPredictRequestNegative, ShouldReturnDeserializationErrorForSetTensorException2) { ov::Core ieCore; std::shared_ptr model = ieCore.read_model(std::filesystem::current_path().u8string() + "/src/test/dummy/1/dummy.xml"); @@ -538,6 +544,8 @@ TEST_F(KserveGRPCPredictRequestNegative, ShouldReturnDeserializationErrorForSetT EXPECT_EQ(status, ovms::StatusCode::OV_INTERNAL_DESERIALIZATION_ERROR) << status.string(); } +#pragma GCC diagnostic pop + std::vector> KserveGRPCPredictRequestNegativeParams = cartesianProduct(UNSUPPORTED_KFS_INPUT_PRECISIONS, {true, false}); INSTANTIATE_TEST_SUITE_P( diff --git a/src/test/dummyUppercase/1/dummy.bin b/src/test/dummyUppercase/1/dummy.bin new file mode 100644 index 0000000000..9c12e8adf9 Binary files /dev/null and b/src/test/dummyUppercase/1/dummy.bin differ diff --git a/src/test/dummyUppercase/1/dummy.xml b/src/test/dummyUppercase/1/dummy.xml new file mode 100644 index 0000000000..953f546df2 --- /dev/null +++ b/src/test/dummyUppercase/1/dummy.xml @@ -0,0 +1,99 @@ + + + + + + + + 1 + 10 + + + + + + + + 1 + 1 + + + + + + + 1 + 10 + + + 1 + 1 + + + + + 1 + 10 + + + + + + + 1 + 10 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/test/dummy_saved_model/1/saved_model.pb b/src/test/dummy_saved_model/1/saved_model.pb new file mode 100644 index 0000000000..84025e6c04 Binary files /dev/null and b/src/test/dummy_saved_model/1/saved_model.pb differ diff --git a/src/test/ensemble_config_change_stress.cpp b/src/test/ensemble_config_change_stress.cpp index b727cf3ef6..e1331654ac 100644 --- a/src/test/ensemble_config_change_stress.cpp +++ b/src/test/ensemble_config_change_stress.cpp @@ -1,6 +1,5 @@ - //***************************************************************************** -// Copyright 2020 Intel Corporation +// Copyright 2020-2023 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -24,14 +23,22 @@ #include "../dags/pipeline_factory.hpp" #include "../dags/pipelinedefinition.hpp" #include "../get_model_metadata_impl.hpp" +#include "../kfs_frontend/kfs_utils.hpp" #include "../localfilesystem.hpp" #include "../logging.hpp" +#if (MEDIAPIPE_DISABLE == 0) +#include "../mediapipe_internal/mediapipegraphexecutor.hpp" +#endif #include "../model_service.hpp" #include "../modelconfig.hpp" #include "../modelinstance.hpp" #include "../prediction_service_utils.hpp" +#include "../servablemanagermodule.hpp" +#include "../server.hpp" #include "../status.hpp" #include "../stringutils.hpp" +#include "../tfs_frontend/tfs_utils.hpp" +#include "c_api_test_utils.hpp" #include "test_utils.hpp" using namespace ovms; @@ -43,6 +50,11 @@ using testing::Return; static const std::string PIPELINE_1_DUMMY_NAME = "pipeline1Dummy"; +enum class SERVABLE_TYPE { + DAG, + MEDIAPIPE +}; + std::string createStressTestPipelineOneDummyConfig() { return R"( { @@ -995,6 +1007,121 @@ static const char* stressTestOneDummyConfig = R"( ] })"; +const std::string basicMediapipeConfig = R"({ + "model_config_list": [ + {"config": { + "name": "dummy", + "base_path": "/ovms/src/test/dummy" + } + } + ], + "mediapipe_config_list": [ + { + "name":"pipeline1Dummy", + "graph_path":"/ovms/src/test/mediapipe/graphdummyadapterfull_dummyinputnames.pbtxt" + } + ] +})"; + +const std::string basicMediapipeConfigWithAddedGraph = R"({ + "model_config_list": [ + {"config": { + "name": "dummy", + "base_path": "/ovms/src/test/dummy" + } + } + ], + "mediapipe_config_list": [ + { + "name":"pipeline1Dummy", + "graph_path":"/ovms/src/test/mediapipe/graphdummyadapterfull_dummyinputnames.pbtxt" + }, + { + "name":"mediaDummy", + "graph_path":"/ovms/src/test/mediapipe/graphdummy.pbtxt" + } + ] +})"; + +const std::string basicMediapipeConfigWithRemovedGraph = R"({ + "model_config_list": [ + {"config": { + "name": "dummy", + "base_path": "/ovms/src/test/dummy" + } + } + ], + "mediapipe_config_list": [ + ] +})"; + +const std::string basicMediapipeConfigWithRemovedModel = R"({ + "model_config_list": [ + ], + "mediapipe_config_list": [ + { + "name":"pipeline1Dummy", + "graph_path":"/ovms/src/test/mediapipe/graphdummyadapterfull_dummyinputnames.pbtxt" + } + ] +})"; + +const std::string basicMediapipeConfigWithReloadedModel = R"({ + "model_config_list": [ + {"config": { + "name": "dummy", + "base_path": "/ovms/src/test/dummy", + "nireq": 47 + } + } + ], + "mediapipe_config_list": [ + { + "name":"pipeline1Dummy", + "graph_path":"/ovms/src/test/mediapipe/graphdummyadapterfull_dummyinputnames.pbtxt" + } + ] +})"; + +const std::string basicMediapipeConfigWithNewGraphPath = R"({ + "model_config_list": [ + {"config": { + "name": "dummy", + "base_path": "/ovms/src/test/dummy" + } + } + ], + "mediapipe_config_list": [ + { + "name":"pipeline1Dummy", + "graph_path":"/ovms/src/test/mediapipe/graphdummyadapterfull_dummyinputnames2.pbtxt" + } + ] +})"; +#if (MEDIAPIPE_DISABLE == 0) +template +void mediaexec(std::shared_ptr& executorPtr, ovms::ModelManager& manager, Request&, Response&, ovms::Status& status) { + throw std::string("Unsupported"); +} +template <> +void mediaexec(std::shared_ptr& executorPtr, ovms::ModelManager& manager, KFSRequest& request, KFSResponse& response, ovms::Status& status) { + ServableMetricReporter* ptr{nullptr}; + status = executorPtr->infer(&request, + &response, + ovms::ExecutionContext( + ovms::ExecutionContext::Interface::GRPC, + ovms::ExecutionContext::Method::Predict), + ptr); +} +template +void mediacreate(std::shared_ptr& executorPtr, ovms::ModelManager& manager, Request&, Response&, ovms::Status& status) { + throw std::string("Unsupported"); +} +template <> +void mediacreate(std::shared_ptr& executorPtr, ovms::ModelManager& manager, KFSRequest& request, KFSResponse& response, ovms::Status& status) { + status = manager.createPipeline(executorPtr, request.model_name(), &request, &response); +} +#endif class StressPipelineConfigChanges : public TestWithTempDir { protected: const uint loadThreadCount = 20; @@ -1013,7 +1140,7 @@ class StressPipelineConfigChanges : public TestWithTempDir { // producess highest results const std::vector requestData{1.1, 2., 3., 7., 5., 6., 4., 9., 10., 8.}; - ConstructorEnabledModelManager manager; + ModelManager* manager; public: virtual std::string getServableName() { @@ -1024,18 +1151,48 @@ class StressPipelineConfigChanges : public TestWithTempDir { const std::string modelPathToReplace{"/ovms/src/test/dummy"}; auto it = ovmsConfig.find(modelPathToReplace); if (it != std::string::npos) { - ovmsConfig.replace(ovmsConfig.find(modelPathToReplace), modelPathToReplace.size(), modelPath); + ovmsConfig.replace(it, modelPathToReplace.size(), modelPath); } configFilePath = directoryPath + "/ovms_config.json"; } - void SetUp() override { + // we setup the OVMS so that it does not have any models loaded but also prepare the fixture to have ovmsConfig & configFilePath set up + void SetUpServerInstance(const std::string& initialConfigContent) { TestWithTempDir::SetUp(); - char* n_argv[] = {(char*)"ovms", (char*)"--config_path", (char*)"/unused", (char*)"--rest_port", (char*)"8080"}; // Workaround to have rest_port parsed in order to enable metrics - int arg_count = 5; - ovms::Config::instance().parse(arg_count, n_argv); + std::string port = "9178"; + std::string restPort = "9178"; modelPath = directoryPath + "/dummy/"; - SetUpConfig(createStressTestPipelineOneDummyConfig()); + SetUpConfig(initialConfigContent); std::filesystem::copy("/ovms/src/test/dummy", modelPath, std::filesystem::copy_options::recursive); + + OVMS_ServerSettings* serverSettings = nullptr; + OVMS_ModelsSettings* modelsSettings = nullptr; + ASSERT_CAPI_STATUS_NULL(OVMS_ServerSettingsNew(&serverSettings)); + ASSERT_CAPI_STATUS_NULL(OVMS_ModelsSettingsNew(&modelsSettings)); + randomizePorts(port, restPort); + ASSERT_CAPI_STATUS_NULL(OVMS_ServerSettingsSetGrpcPort(serverSettings, std::stoi(port))); + ASSERT_CAPI_STATUS_NULL(OVMS_ServerSettingsSetRestPort(serverSettings, std::stoi(restPort))); // required for metrics + // ideally we would want to have emptyConfigWithMetrics + ASSERT_CAPI_STATUS_NULL(OVMS_ModelsSettingsSetConfigPath(modelsSettings, "/ovms/src/test/configs/emptyConfigWithMetrics.json")); // FIXME the content of config json is irrelevant - we just need server to be ready for C-API use in mediapipe + ASSERT_CAPI_STATUS_NULL(OVMS_ServerSettingsSetFileSystemPollWaitSeconds(serverSettings, 0)); // set to 0 to reload only through test and avoid races + OVMS_Server* cserver; + ASSERT_CAPI_STATUS_NULL(OVMS_ServerNew(&cserver)); + ASSERT_CAPI_STATUS_NULL(OVMS_ServerStartFromConfigurationFile(cserver, serverSettings, modelsSettings)); + OVMS_ModelsSettingsDelete(modelsSettings); + OVMS_ServerSettingsDelete(serverSettings); + ovms::Server& server = ovms::Server::instance(); + manager = &(dynamic_cast(server.getModule(SERVABLE_MANAGER_MODULE_NAME))->getServableManager()); + } + void SetUp() override { + SetUpServerInstance(createStressTestPipelineOneDummyConfig()); + } + void TearDown() override { + OVMS_Server* cserver; + ASSERT_CAPI_STATUS_NULL(OVMS_ServerNew(&cserver)); + ovms::Server& server = ovms::Server::instance(); + manager->join(); + server.setShutdownRequest(1); + OVMS_ServerDelete(cserver); + server.setShutdownRequest(0); } void defaultVersionRemove() { SPDLOG_INFO("{} start", __FUNCTION__); @@ -1125,6 +1282,36 @@ class StressPipelineConfigChanges : public TestWithTempDir { createConfigFileWithContent(ovmsConfig, configFilePath); SPDLOG_INFO("{} end", __FUNCTION__); } + void addNewMediapipeGraph() { + SPDLOG_INFO("{} start", __FUNCTION__); + SetUpConfig(basicMediapipeConfigWithAddedGraph); + createConfigFileWithContent(ovmsConfig, configFilePath); + SPDLOG_INFO("{} end", __FUNCTION__); + } + void removeMediapipeGraph() { + SPDLOG_INFO("{} start", __FUNCTION__); + SetUpConfig(basicMediapipeConfigWithRemovedGraph); + createConfigFileWithContent(ovmsConfig, configFilePath); + SPDLOG_INFO("{} end", __FUNCTION__); + } + void removeMediapipeGraphUsedModel() { + SPDLOG_INFO("{} start", __FUNCTION__); + SetUpConfig(basicMediapipeConfigWithRemovedModel); + createConfigFileWithContent(ovmsConfig, configFilePath); + SPDLOG_INFO("{} end", __FUNCTION__); + } + void reloadMediapipeGraphUsedModel() { + SPDLOG_INFO("{} start", __FUNCTION__); + SetUpConfig(basicMediapipeConfigWithReloadedModel); + createConfigFileWithContent(ovmsConfig, configFilePath); + SPDLOG_INFO("{} end", __FUNCTION__); + } + void reloadMediapipeGraph() { + SPDLOG_INFO("{} start", __FUNCTION__); + SetUpConfig(basicMediapipeConfigWithNewGraphPath); + createConfigFileWithContent(ovmsConfig, configFilePath); + SPDLOG_INFO("{} end", __FUNCTION__); + } void checkMetricGreaterThan(const std::string& metricName, double value, std::string& metricOutput, bool& result) { ASSERT_THAT(metricOutput, ::testing::HasSubstr(metricName + std::string{"{name=\"dummy\",version=\"1\"} "})) << "cannot find dummys " << metricName << " metric\n" << metricOutput; @@ -1148,7 +1335,7 @@ class StressPipelineConfigChanges : public TestWithTempDir { << metricOutput; } void checkActiveNireqSmallerThanTotal() { - std::string metricOutput = manager.getMetricRegistry()->collect(); + std::string metricOutput = manager->getMetricRegistry()->collect(); std::regex findNireqTotalRgx(std::string{".*"} + METRIC_NAME_INFER_REQ_QUEUE_SIZE + std::string{"\\{name=\"dummy\",version=\"1\"\\} (.*)\n.*"}); std::regex findNireqActiveRgx(std::string{".*"} + METRIC_NAME_INFER_REQ_ACTIVE + std::string{"\\{name=\"dummy\",version=\"1\"\\} (.*)\n.*"}); std::smatch match; @@ -1167,7 +1354,7 @@ class StressPipelineConfigChanges : public TestWithTempDir { bool current_requests_pass = false, infer_req_active_pass = false; int retries = 3; for (int i = 0; i < retries; i++) { - std::string metricOutput = manager.getMetricRegistry()->collect(); + std::string metricOutput = manager->getMetricRegistry()->collect(); checkMetricGreaterThan(METRIC_NAME_CURRENT_REQUESTS, 0, metricOutput, current_requests_pass); checkMetricGreaterThan(METRIC_NAME_INFER_REQ_ACTIVE, 0, metricOutput, infer_req_active_pass); if (current_requests_pass && infer_req_active_pass) @@ -1191,7 +1378,7 @@ class StressPipelineConfigChanges : public TestWithTempDir { std::set requiredLoadResults, std::set allowedLoadResults) { createConfigFileWithContent(ovmsConfig, configFilePath); - auto status = manager.loadConfig(configFilePath); + auto status = manager->loadConfig(configFilePath); ASSERT_TRUE(status.ok()); // setup helper variables for managing threads @@ -1225,7 +1412,7 @@ class StressPipelineConfigChanges : public TestWithTempDir { i]() { ((*this).*triggerLoadInALoop)(futureStartSignals[i], futureStopSignals[i], - this->manager, + *(this->manager), requiredLoadResults, allowedLoadResults, createPipelineRetCodesCounters); @@ -1237,9 +1424,9 @@ class StressPipelineConfigChanges : public TestWithTempDir { std::this_thread::sleep_for(std::chrono::milliseconds(beforeConfigChangeLoadTimeMs)); ((*this).*configChangeOperation)(); if (reloadWholeConfig) { - manager.loadConfig(configFilePath); + manager->loadConfig(configFilePath); } else { - manager.updateConfigurationWithoutConfigFile(); + manager->updateConfigurationWithoutConfigFile(); } // wait to work strictly on config operations after change std::this_thread::sleep_for(std::chrono::milliseconds(afterConfigChangeLoadTimeMs)); @@ -1260,7 +1447,7 @@ class StressPipelineConfigChanges : public TestWithTempDir { << " was not allowed in test but occured during load"; } } - bool isMetadataResponseCorrect(tensorflow::serving::GetModelMetadataResponse& response) { + bool isMetadataResponseCorrect(tensorflow::serving::GetModelMetadataResponse& response, SERVABLE_TYPE servableType) { tensorflow::serving::SignatureDefMap def; EXPECT_EQ(response.model_spec().name(), pipelineName); EXPECT_TRUE(response.model_spec().has_version()); @@ -1298,9 +1485,18 @@ class StressPipelineConfigChanges : public TestWithTempDir { if (!inputTypeCorrect) { return false; } - bool inputShapeCorrect{isShapeTheSame( + bool inputShapeCorrect = false; + std::vector expectedShape; + if (servableType == SERVABLE_TYPE::DAG) { + expectedShape = std::vector{1, 10}; + } else if (servableType == SERVABLE_TYPE::MEDIAPIPE) { + expectedShape = std::vector{}; + } else { + EXPECT_TRUE(false) << "Unsupported checks"; + } + inputShapeCorrect = isShapeTheSame( inputs.at(pipelineInputName.c_str()).tensor_shape(), - {1, 10})}; + std::move(expectedShape)); EXPECT_TRUE(inputShapeCorrect); if (!inputShapeCorrect) { return false; @@ -1330,6 +1526,10 @@ class StressPipelineConfigChanges : public TestWithTempDir { } return true; } + template < + typename RequestType = tensorflow::serving::GetModelMetadataRequest, + typename ResponseType = tensorflow::serving::GetModelMetadataResponse, + typename ServableType = ovms::Pipeline> void triggerGetPipelineMetadataInALoop( std::future& startSignal, std::future& stopSignal, @@ -1358,7 +1558,13 @@ class StressPipelineConfigChanges : public TestWithTempDir { continue; } // Check response if correct - EXPECT_TRUE(isMetadataResponseCorrect(response)); + SERVABLE_TYPE servableType = SERVABLE_TYPE::DAG; +#if (MEDIAPIPE_DISABLE == 0) + if (typeid(ServableType) == typeid(MediapipeGraphExecutor)) { + servableType = SERVABLE_TYPE::MEDIAPIPE; + } +#endif + EXPECT_TRUE(isMetadataResponseCorrect(response, servableType)); if (::testing::Test::HasFailure()) { SPDLOG_INFO("Earlier fail detected. Stopping execution"); break; @@ -1402,22 +1608,34 @@ class StressPipelineConfigChanges : public TestWithTempDir { } virtual inputs_info_t getExpectedInputsInfo() { return {{pipelineInputName, - std::tuple{{1, DUMMY_MODEL_INPUT_SIZE}, ovms::Precision::FP32}}}; + std::tuple{{1, DUMMY_MODEL_INPUT_SIZE}, ovms::Precision::FP32}}}; } - virtual tensorflow::serving::PredictRequest preparePipelinePredictRequest() { + virtual tensorflow::serving::PredictRequest preparePipelinePredictRequest(tensorflow::serving::PredictRequest) { tensorflow::serving::PredictRequest request; preparePredictRequest(request, getExpectedInputsInfo()); auto& input = (*request.mutable_inputs())[pipelineInputName]; input.mutable_tensor_content()->assign((char*)requestData.data(), requestData.size() * sizeof(float)); return request; } + virtual KFSRequest preparePipelinePredictRequest(KFSRequest&) { + KFSRequest request; + preparePredictRequest(request, getExpectedInputsInfo(), requestData); + request.set_model_name(PIPELINE_1_DUMMY_NAME); + return request; + } virtual void checkPipelineResponse(const std::string& pipelineOutputName, - tensorflow::serving::PredictRequest& request, - tensorflow::serving::PredictResponse& response) { + TFSPredictRequest& request, + TFSPredictResponse& response) { checkDummyResponse(pipelineOutputName, requestData, request, response, 1); } + virtual void checkPipelineResponse(const std::string& pipelineOutputName, + KFSRequest& request, + KFSResponse& response) { + checkDummyResponse(pipelineOutputName, requestData, request, response, 1, 1, pipelineName); + } + template void triggerPredictInALoop( std::future& startSignal, std::future& stopSignal, @@ -1428,17 +1646,32 @@ class StressPipelineConfigChanges : public TestWithTempDir { startSignal.get(); // stressIterationsCounter is additional safety measure auto stressIterationsCounter = stressIterationsLimit; + bool breakLoop = false; while (stressIterationsCounter-- > 0) { auto futureWaitResult = stopSignal.wait_for(std::chrono::milliseconds(0)); - if (futureWaitResult == std::future_status::ready) { - SPDLOG_INFO("Got stop signal. Ending Load"); + if (true == breakLoop) { + SPDLOG_INFO("Ending Load"); break; } + if (futureWaitResult == std::future_status::ready) { + SPDLOG_INFO("Got stop signal. Triggering last request"); + breakLoop = true; + } std::unique_ptr pipelinePtr; - - tensorflow::serving::PredictRequest request = preparePipelinePredictRequest(); - tensorflow::serving::PredictResponse response; - auto createPipelineStatus = manager.createPipeline(pipelinePtr, pipelineName, &request, &response); +#if (MEDIAPIPE_DISABLE == 0) + std::shared_ptr executorPtr; +#endif + ResponseType response; + RequestType request2; // DIRTY HACK WARNING + RequestType request = preparePipelinePredictRequest(request2); + ovms::Status createPipelineStatus = StatusCode::UNKNOWN_ERROR; + if (typeid(ServableType) == typeid(ovms::Pipeline)) { + createPipelineStatus = manager.createPipeline(pipelinePtr, pipelineName, &request, &response); +#if (MEDIAPIPE_DISABLE == 0) + } else if (typeid(ServableType) == typeid(ovms::MediapipeGraphExecutor)) { + mediacreate(executorPtr, manager, request, response, createPipelineStatus); +#endif + } // we need to make sure that expected status happened and still accept // some that could happen but we may not hit them EXPECT_TRUE((requiredLoadResults.find(createPipelineStatus.getCode()) != requiredLoadResults.end()) || @@ -1450,9 +1683,15 @@ class StressPipelineConfigChanges : public TestWithTempDir { } ovms::Status executePipelineStatus = StatusCode::UNKNOWN_ERROR; - executePipelineStatus = pipelinePtr->execute(ovms::ExecutionContext( - ovms::ExecutionContext::Interface::GRPC, - ovms::ExecutionContext::Method::Predict)); + if (typeid(ServableType) == typeid(ovms::Pipeline)) { + executePipelineStatus = pipelinePtr->execute(ovms::ExecutionContext( + ovms::ExecutionContext::Interface::GRPC, + ovms::ExecutionContext::Method::Predict)); +#if (MEDIAPIPE_DISABLE == 0) + } else if (typeid(ServableType) == typeid(ovms::MediapipeGraphExecutor)) { + mediaexec(executorPtr, manager, request, response, executePipelineStatus); +#endif + } createPipelineRetCodesCounters[executePipelineStatus.getCode()]++; EXPECT_TRUE((requiredLoadResults.find(executePipelineStatus.getCode()) != requiredLoadResults.end()) || (allowedLoadResults.find(executePipelineStatus.getCode()) != allowedLoadResults.end())) @@ -1477,7 +1716,7 @@ class StressPipelineConfigChanges : public TestWithTempDir { } }; -static const char* initialClearConfig = R"( +static const std::string initialClearConfig = R"( { "model_config_list": [ ] @@ -1493,10 +1732,7 @@ class StressModelConfigChanges : public StressPipelineConfigChanges { return modelName; } void SetUp() override { - TestWithTempDir::SetUp(); - modelPath = directoryPath + "/dummy/"; - SetUpConfig(initialClearConfig); - std::filesystem::copy("/ovms/src/test/dummy", modelPath, std::filesystem::copy_options::recursive); + SetUpServerInstance(initialClearConfig); } }; @@ -1505,7 +1741,19 @@ TEST_F(StressPipelineConfigChanges, AddNewVersionDuringPredictLoad) { std::set requiredLoadResults = {StatusCode::OK}; // we expect full continuouity of operation std::set allowedLoadResults = {}; performStressTest( - &StressPipelineConfigChanges::triggerPredictInALoop, + &StressPipelineConfigChanges::triggerPredictInALoop, + &StressPipelineConfigChanges::defaultVersionAdd, + performWholeConfigReload, + requiredLoadResults, + allowedLoadResults); +} +TEST_F(StressPipelineConfigChanges, KFSAddNewVersionDuringPredictLoad) { + bool performWholeConfigReload = false; // we just need to have all model versions rechecked + std::set requiredLoadResults = {StatusCode::OK}; // we expect full continuouity of operation + std::set allowedLoadResults = {}; + performStressTest( + // XYZ &StressPipelineConfigChanges::triggerPredictInALoop, + &StressPipelineConfigChanges::triggerPredictInALoop, &StressPipelineConfigChanges::defaultVersionAdd, performWholeConfigReload, requiredLoadResults, @@ -1516,7 +1764,7 @@ TEST_F(StressPipelineConfigChanges, GetMetricsDuringLoad) { std::set requiredLoadResults = {StatusCode::OK}; // we expect full continuouity of operation std::set allowedLoadResults = {}; performStressTest( - &StressPipelineConfigChanges::triggerPredictInALoop, + &StressPipelineConfigChanges::triggerPredictInALoop, &StressPipelineConfigChanges::testCurrentRequestsMetric, performWholeConfigReload, requiredLoadResults, @@ -1532,7 +1780,7 @@ TEST_F(StressPipelineConfigChanges, RemoveDefaultVersionDuringPredictLoad) { // all model versions different than removing model from config bool performWholeConfigReload = true; performStressTest( - &StressPipelineConfigChanges::triggerPredictInALoop, + &StressPipelineConfigChanges::triggerPredictInALoop, &StressPipelineConfigChanges::defaultVersionRemove, performWholeConfigReload, requiredLoadResults, @@ -1543,7 +1791,7 @@ TEST_F(StressPipelineConfigChanges, ChangeToShapeAutoDuringPredictLoad) { std::set requiredLoadResults = {StatusCode::OK}; // we expect full continuouity of operation std::set allowedLoadResults = {StatusCode::PIPELINE_DEFINITION_NOT_LOADED_YET}; performStressTest( - &StressPipelineConfigChanges::triggerPredictInALoop, + &StressPipelineConfigChanges::triggerPredictInALoop, &StressPipelineConfigChanges::changeToAutoShape, performWholeConfigReload, requiredLoadResults, @@ -1555,7 +1803,7 @@ TEST_F(StressPipelineConfigChanges, RemovePipelineDefinitionDuringPredictLoad) { StatusCode::PIPELINE_DEFINITION_NOT_LOADED_ANYMORE}; // we expect to stop creating pipelines std::set allowedLoadResults = {}; performStressTest( - &StressPipelineConfigChanges::triggerPredictInALoop, + &StressPipelineConfigChanges::triggerPredictInALoop, &StressPipelineConfigChanges::removePipelineDefinition, performWholeConfigReload, requiredLoadResults, @@ -1566,7 +1814,7 @@ TEST_F(StressPipelineConfigChanges, ChangedPipelineConnectionNameDuringPredictLo std::set requiredLoadResults = {StatusCode::OK}; // we expect full continuouity of operation std::set allowedLoadResults = {StatusCode::PIPELINE_DEFINITION_NOT_LOADED_YET}; performStressTest( - &StressPipelineConfigChanges::triggerPredictInALoop, + &StressPipelineConfigChanges::triggerPredictInALoop, &StressPipelineConfigChanges::changeConnectionName, performWholeConfigReload, requiredLoadResults, @@ -1577,7 +1825,7 @@ TEST_F(StressPipelineConfigChanges, AddedNewPipelineDuringPredictLoad) { std::set requiredLoadResults = {StatusCode::OK}; // we expect full continuouity of operation std::set allowedLoadResults = {}; performStressTest( - &StressPipelineConfigChanges::triggerPredictInALoop, + &StressPipelineConfigChanges::triggerPredictInALoop, &StressPipelineConfigChanges::addNewPipeline, performWholeConfigReload, requiredLoadResults, @@ -1593,7 +1841,7 @@ TEST_F(StressPipelineConfigChanges, RetireSpecificVersionUsedDuringPredictLoad) StatusCode::MODEL_VERSION_NOT_LOADED_ANYMORE}; // version is retired but pipeline not invalidated yet std::set allowedLoadResults = {}; performStressTest( - &StressPipelineConfigChanges::triggerPredictInALoop, + &StressPipelineConfigChanges::triggerPredictInALoop, &StressPipelineConfigChanges::retireSpecificVersionUsed, performWholeConfigReload, requiredLoadResults, @@ -1703,7 +1951,7 @@ TEST_F(StressPipelineCustomNodesWithPreallocatedBuffersConfigChanges, RemoveCust StatusCode::PIPELINE_DEFINITION_NOT_LOADED_YET}; // we hit when all config changes finish to propagate std::set allowedLoadResults = {}; performStressTest( - &StressPipelineConfigChanges::triggerPredictInALoop, + &StressPipelineConfigChanges::triggerPredictInALoop, &StressPipelineConfigChanges::removePreallocatedCustomLibraryUsed, performWholeConfigReload, requiredLoadResults, @@ -1716,7 +1964,7 @@ TEST_F(StressPipelineCustomNodesWithPreallocatedBuffersConfigChanges, RenameCust std::set requiredLoadResults = {StatusCode::OK}; // we expect full continuouity of operation std::set allowedLoadResults = {StatusCode::PIPELINE_DEFINITION_NOT_LOADED_YET}; // might hit reload phase performStressTest( - &StressPipelineConfigChanges::triggerPredictInALoop, + &StressPipelineConfigChanges::triggerPredictInALoop, &StressPipelineConfigChanges::renamePreallocatedCustomLibraryUsed, performWholeConfigReload, requiredLoadResults, @@ -1729,7 +1977,7 @@ TEST_F(StressPipelineCustomNodesWithPreallocatedBuffersConfigChanges, ChangePara std::set requiredLoadResults = {StatusCode::OK}; // we expect full continuouity of operation std::set allowedLoadResults = {StatusCode::PIPELINE_DEFINITION_NOT_LOADED_YET}; // might hit reload phase performStressTest( - &StressPipelineConfigChanges::triggerPredictInALoop, + &StressPipelineConfigChanges::triggerPredictInALoop, &StressPipelineConfigChanges::changeParamPreallocatedCustomLibraryUsed, performWholeConfigReload, requiredLoadResults, @@ -1742,7 +1990,7 @@ TEST_F(StressPipelineCustomNodesWithPreallocatedBuffersConfigChanges, ReduceQueu std::set requiredLoadResults = {StatusCode::OK}; // we expect full continuouity of operation std::set allowedLoadResults = {StatusCode::PIPELINE_DEFINITION_NOT_LOADED_YET}; // might hit reload phase performStressTest( - &StressPipelineConfigChanges::triggerPredictInALoop, + &StressPipelineConfigChanges::triggerPredictInALoop, &StressPipelineConfigChanges::reduceQueueSizePreallocatedCustomLibraryUsed, performWholeConfigReload, requiredLoadResults, @@ -1755,7 +2003,7 @@ TEST_F(StressPipelineCustomNodesWithPreallocatedBuffersConfigChanges, IncreaseQu std::set requiredLoadResults = {StatusCode::OK}; // we expect full continuouity of operation std::set allowedLoadResults = {StatusCode::PIPELINE_DEFINITION_NOT_LOADED_YET}; // might hit reload phase performStressTest( - &StressPipelineConfigChanges::triggerPredictInALoop, + &StressPipelineConfigChanges::triggerPredictInALoop, &StressPipelineConfigChanges::increaseQueueSizePreallocatedCustomLibraryUsed, performWholeConfigReload, requiredLoadResults, @@ -1834,7 +2082,7 @@ class StressPipelineCustomNodesConfigChanges : public StressPipelineConfigChange const std::string pipelineFactorsInputName{"pipeline_factors"}; public: - tensorflow::serving::PredictRequest preparePipelinePredictRequest() override { + tensorflow::serving::PredictRequest preparePipelinePredictRequest(tensorflow::serving::PredictRequest) override { tensorflow::serving::PredictRequest request; preparePredictRequest(request, getExpectedInputsInfo()); auto& input = (*request.mutable_inputs())[pipelineInputName]; @@ -1845,9 +2093,9 @@ class StressPipelineCustomNodesConfigChanges : public StressPipelineConfigChange } inputs_info_t getExpectedInputsInfo() override { return {{pipelineInputName, - std::tuple{{1, DUMMY_MODEL_INPUT_SIZE}, ovms::Precision::FP32}}, + std::tuple{{1, DUMMY_MODEL_INPUT_SIZE}, ovms::Precision::FP32}}, {pipelineFactorsInputName, - std::tuple{{1, differentOpsFactorsInputSize}, ovms::Precision::FP32}}}; + std::tuple{{1, differentOpsFactorsInputSize}, ovms::Precision::FP32}}}; } void checkPipelineResponse(const std::string& pipelineOutputName, tensorflow::serving::PredictRequest& request, @@ -1866,7 +2114,7 @@ TEST_F(StressPipelineCustomNodesConfigChanges, RemoveCustomLibraryDuringPredictL StatusCode::PIPELINE_DEFINITION_NOT_LOADED_YET}; // we hit when all config changes finish to propagate std::set allowedLoadResults = {}; performStressTest( - &StressPipelineConfigChanges::triggerPredictInALoop, + &StressPipelineConfigChanges::triggerPredictInALoop, &StressPipelineConfigChanges::removeCustomLibraryUsed, performWholeConfigReload, requiredLoadResults, @@ -1880,7 +2128,7 @@ TEST_F(StressPipelineCustomNodesConfigChanges, ChangeCustomLibraryParamDuringPre std::set requiredLoadResults = {StatusCode::OK}; // we expect full continuouity of operation std::set allowedLoadResults = {}; performStressTest( - &StressPipelineConfigChanges::triggerPredictInALoop, + &StressPipelineConfigChanges::triggerPredictInALoop, &StressPipelineConfigChanges::changeCustomLibraryParam, performWholeConfigReload, requiredLoadResults, @@ -1926,3 +2174,88 @@ TEST_F(StressModelConfigChanges, AddModelDuringGetModelStatusLoad) { requiredLoadResults, allowedLoadResults); } + +#if (MEDIAPIPE_DISABLE == 0) +class StressMediapipeChanges : public StressPipelineConfigChanges { + const std::string modelName = "dummy"; + const std::string modelInputName = "b"; + const std::string modelOutputName = "a"; + +public: + std::string getServableName() override { + return modelName; + } + void SetUp() override { + SetUpServerInstance(createStressTestPipelineOneDummyConfig()); + } +}; +TEST_F(StressMediapipeChanges, AddGraphDuringPredictLoad) { + // we add another definition during load + SetUpConfig(basicMediapipeConfig); + bool performWholeConfigReload = true; + std::set requiredLoadResults = {StatusCode::OK}; // we expect full continuouity of operation + std::set allowedLoadResults = {}; + performStressTest( + &StressPipelineConfigChanges::triggerPredictInALoop, + &StressPipelineConfigChanges::addNewMediapipeGraph, + performWholeConfigReload, + requiredLoadResults, + allowedLoadResults); +} +TEST_F(StressMediapipeChanges, RemoveGraphDuringPredictLoad) { + // we add another definition during load + SetUpConfig(basicMediapipeConfig); + bool performWholeConfigReload = true; + std::set requiredLoadResults = {StatusCode::OK, // we expect full continuouity of operation + StatusCode::MEDIAPIPE_DEFINITION_NOT_LOADED_ANYMORE}; // we expect to stop creating pipelines + std::set allowedLoadResults = {}; + performStressTest( + &StressPipelineConfigChanges::triggerPredictInALoop, + &StressPipelineConfigChanges::removeMediapipeGraph, + performWholeConfigReload, + requiredLoadResults, + allowedLoadResults); +} +TEST_F(StressMediapipeChanges, RemoveModelDuringPredictLoad) { + // we add another definition during load + SetUpConfig(basicMediapipeConfig); + bool performWholeConfigReload = true; + std::set requiredLoadResults = {StatusCode::OK, // we expect full continuouity of operation + StatusCode::MEDIAPIPE_EXECUTION_ERROR}; // we expect to stop creating pipelines + std::set allowedLoadResults = {}; + performStressTest( + &StressPipelineConfigChanges::triggerPredictInALoop, + &StressPipelineConfigChanges::removeMediapipeGraphUsedModel, + performWholeConfigReload, + requiredLoadResults, + allowedLoadResults); +} +TEST_F(StressMediapipeChanges, ReloadModelDuringPredictLoad) { + // we change nireq during load + SetUpConfig(basicMediapipeConfig); + bool performWholeConfigReload = true; + std::set requiredLoadResults = {StatusCode::OK}; // we expect full continuouity of operation + std::set allowedLoadResults = {}; + performStressTest( + &StressPipelineConfigChanges::triggerPredictInALoop, + &StressPipelineConfigChanges::reloadMediapipeGraphUsedModel, + performWholeConfigReload, + requiredLoadResults, + allowedLoadResults); +} +TEST_F(StressMediapipeChanges, ReloadMediapipeGraphDuringPredictLoad) { + // we change nireq during load + SetUpConfig(basicMediapipeConfig); + bool performWholeConfigReload = true; + std::set requiredLoadResults = {StatusCode::OK}; // we expect full continuouity of operation + std::set allowedLoadResults = {}; + performStressTest( + &StressPipelineConfigChanges::triggerPredictInALoop, + &StressPipelineConfigChanges::reloadMediapipeGraph, + performWholeConfigReload, + requiredLoadResults, + allowedLoadResults); +} +// TODO status +// TODO metadata +#endif diff --git a/src/test/ensemble_flow_custom_node_tests.cpp b/src/test/ensemble_flow_custom_node_tests.cpp index bfa1e259e6..14a0d5838b 100644 --- a/src/test/ensemble_flow_custom_node_tests.cpp +++ b/src/test/ensemble_flow_custom_node_tests.cpp @@ -189,8 +189,8 @@ class EnsembleFlowCustomNodePipelineExecutionTest : public TestWithTempDir { const std::string customNodeOutputName = "output_numbers"; static constexpr const char* pipelineInputName = "pipeline_input"; const std::string pipelineOutputName = "pipeline_output"; - std::shared_ptr dagDummyModelOutputTensorInfo; - std::shared_ptr dagDummyModelInputTensorInfo; + std::shared_ptr dagDummyModelOutputTensorInfo; + std::shared_ptr dagDummyModelInputTensorInfo; }; TEST_F(EnsembleFlowCustomNodePipelineExecutionTest, AddSubCustomNode) { @@ -1713,7 +1713,7 @@ enum class Method { MAXIMUM_AVERAGE, }; -std::vector prepareGatherHighestExpectedOutput(std::vector input, Method option) { +static std::vector prepareGatherHighestExpectedOutput(std::vector input, Method option) { std::vector expectedOutput(DUMMY_MODEL_OUTPUT_SIZE); size_t tensorsCount = input.size() / DUMMY_MODEL_OUTPUT_SIZE; // perform operations @@ -4875,7 +4875,7 @@ TEST_F(EnsembleFlowCustomNodePipelineExecutionTest, DemultiplexerConnectedToNhwc // Execute ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), ovms::StatusCode::OK); - checkIncrement4DimResponse(pipelineOutputName, {3.0, 6.0, 4.0, 7.0, 5.0, 8.0, 4.0, 7.0, 5.0, 8.0, 6.0, 9.0, 5.0, 8.0, 6.0, 9.0, 7.0, 10.0}, request, response, {3, 1, 3, 1, 2}); + checkIncrement4DimResponse(pipelineOutputName, {3.0, 6.0, 4.0, 7.0, 5.0, 8.0, 4.0, 7.0, 5.0, 8.0, 6.0, 9.0, 5.0, 8.0, 6.0, 9.0, 7.0, 10.0}, response, {3, 1, 3, 1, 2}); } struct LibraryProduceImages5DimensionsInFP32OutFP64 { @@ -4981,7 +4981,7 @@ TEST_F(EnsembleFlowCustomNodePipelineExecutionTest, DemultiplexerConnectedToNhwc // Execute ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), ovms::StatusCode::OK); - checkIncrement4DimResponse(pipelineOutputName, {3.0, 6.0, 4.0, 7.0, 5.0, 8.0, 4.0, 7.0, 5.0, 8.0, 6.0, 9.0, 5.0, 8.0, 6.0, 9.0, 7.0, 10.0}, request, response, {3, 1, 3, 1, 2}); + checkIncrement4DimResponse(pipelineOutputName, {3.0, 6.0, 4.0, 7.0, 5.0, 8.0, 4.0, 7.0, 5.0, 8.0, 6.0, 9.0, 5.0, 8.0, 6.0, 9.0, 7.0, 10.0}, response, {3, 1, 3, 1, 2}); } TEST_F(EnsembleFlowCustomNodePipelineExecutionTest, DemultiplexerCreatesShardedFP64TensorsFromCustomNode) { @@ -5043,7 +5043,7 @@ TEST_F(EnsembleFlowCustomNodePipelineExecutionTest, DemultiplexerCreatesShardedF // Execute ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), ovms::StatusCode::OK); - checkIncrement4DimResponse(pipelineOutputName, {3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}, request, response, {3, 1, 1, 2, 3}); + checkIncrement4DimResponse(pipelineOutputName, {3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}, response, {3, 1, 1, 2, 3}); } TEST_F(EnsembleFlowCustomNodePipelineExecutionTest, DemultiplexerCreatesShardedFP64TensorsFromEntryNode) { @@ -5102,7 +5102,7 @@ TEST_F(EnsembleFlowCustomNodePipelineExecutionTest, DemultiplexerCreatesShardedF // Execute ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), ovms::StatusCode::OK); - checkIncrement4DimResponse(pipelineOutputName, {3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}, request, response, {2, 1, 2, 1, 2}); + checkIncrement4DimResponse(pipelineOutputName, {3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}, response, {2, 1, 2, 1, 2}); } struct LibraryCountDeinitialize { @@ -5259,3 +5259,213 @@ TEST_F(EnsembleFlowCustomNodePipelineExecutionTest, ReloadPipelineWithoutNodeDei // in order to count whether deinitialize has been called expected number of times ASSERT_EQ(LibraryCountDeinitialize::deinitializeCounter, 3); } + +static constexpr const char* INPUT_TENSOR_NAME = "input_string"; +static constexpr const char* OUTPUT_TENSOR_NAME = "output_string"; + +struct Passthrough_AnyDim_U8 { + static int initialize(void** customNodeLibraryInternalManager, const struct CustomNodeParam* params, int paramsCount) { + return 0; + } + static int deinitialize(void* customNodeLibraryInternalManager) { + return 0; + } + static int execute(const struct CustomNodeTensor* inputs, int inputsCount, struct CustomNodeTensor** outputs, int* outputsCount, const struct CustomNodeParam* params, int paramsCount, void* customNodeLibraryInternalManager) { + int numberOfDimensions = 2; // default + for (int i = 0; i < paramsCount; i++) { + if (std::strcmp(params[i].key, "num_of_dims") == 0) { + numberOfDimensions = std::stoi(params[i].value); + } + } + // // Inputs reading + const CustomNodeTensor* input = nullptr; + + for (int i = 0; i < inputsCount; i++) { + if (std::strcmp(inputs[i].name, INPUT_TENSOR_NAME) == 0) { + input = &(inputs[i]); + } else { + std::cout << "Unrecognized input: " << inputs[i].name << std::endl; + return 1; + } + } + + // Preparing output tensor + float* buffer = (float*)malloc(inputs[0].dataBytes); + std::memcpy((uint8_t*)buffer, inputs[0].data, inputs[0].dataBytes); + + *outputsCount = 1; + *outputs = (struct CustomNodeTensor*)malloc(*outputsCount * sizeof(CustomNodeTensor)); + if ((*outputs) == nullptr) { + std::cout << "malloc has failed" << std::endl; + free(buffer); + return 1; + } + + CustomNodeTensor& output = (*outputs)[0]; + output.name = OUTPUT_TENSOR_NAME; + output.data = reinterpret_cast(buffer); + output.dataBytes = inputs[0].dataBytes; + output.dimsCount = numberOfDimensions; + output.dims = (uint64_t*)malloc(output.dimsCount * sizeof(uint64_t)); + for (int i = 0; i < numberOfDimensions; i++) { + output.dims[i] = input->dims[i]; + } + output.precision = U8; + + return 0; + } + static int getInputsInfo(struct CustomNodeTensorInfo** info, int* infoCount, const struct CustomNodeParam* params, int paramsCount, void* customNodeLibraryInternalManager) { + int numberOfDimensions = 2; // default + for (int i = 0; i < paramsCount; i++) { + if (std::strcmp(params[i].key, "num_of_dims") == 0) { + numberOfDimensions = std::stoi(params[i].value); + } + } + + *infoCount = 1; + *info = (struct CustomNodeTensorInfo*)malloc(*infoCount * sizeof(struct CustomNodeTensorInfo)); + + (*info)[0].name = INPUT_TENSOR_NAME; + (*info)[0].dimsCount = numberOfDimensions; + (*info)[0].dims = (uint64_t*)malloc((*info)[0].dimsCount * sizeof(uint64_t)); + for (int i = 0; i < numberOfDimensions; i++) { + (*info)[0].dims[i] = -1; + } + (*info)[0].precision = U8; + + return 0; + } + static int getOutputsInfo(struct CustomNodeTensorInfo** info, int* infoCount, const struct CustomNodeParam* params, int paramsCount, void* customNodeLibraryInternalManager) { + int numberOfDimensions = 2; // default + for (int i = 0; i < paramsCount; i++) { + if (std::strcmp(params[i].key, "num_of_dims") == 0) { + numberOfDimensions = std::stoi(params[i].value); + } + } + + *infoCount = 1; + *info = (struct CustomNodeTensorInfo*)malloc(*infoCount * sizeof(struct CustomNodeTensorInfo)); + + (*info)[0].name = OUTPUT_TENSOR_NAME; + (*info)[0].dimsCount = numberOfDimensions; + (*info)[0].dims = (uint64_t*)malloc((*info)->dimsCount * sizeof(uint64_t)); + for (int i = 0; i < numberOfDimensions; i++) { + (*info)[0].dims[i] = -1; + } + + (*info)[0].precision = U8; + + return 0; + } + static int release(void* ptr, void* customNodeLibraryInternalManager) { + free(ptr); + return 0; + } +}; + +template +class EnsembleFlowStringInput : public ::testing::Test { +public: + void SetUp() override {} + + RequestType request; + ResponseType response; + std::unique_ptr reporter; + + const std::string customNodeName = "passthrough"; + static constexpr const char* pipelineInputName = "pipeline_input"; + const std::string pipelineOutputName = "pipeline_output"; + const std::string pipelineName = "my_pipeline"; + std::set gatherFromNode = {}; +}; + +using MyTypes = ::testing::Types; +TYPED_TEST_SUITE(EnsembleFlowStringInput, MyTypes); + +TYPED_TEST(EnsembleFlowStringInput, positive_2d) { + // Most basic configuration, just process single passthrough custom node pipeline request + // input passthrough output + // O------->O------->O + std::vector expectedStrings = {"String_123", "zebra", ""}; + prepareInferStringRequest(this->request, this->pipelineInputName, expectedStrings); + + auto inputTensorInfo = std::make_shared(this->pipelineInputName, + ovms::Precision::U8, + ovms::Shape{-1, -1}, + Layout{"NC"}); + const tensor_map_t inputsInfo{{this->pipelineInputName, inputTensorInfo}}; + auto input_node = std::make_unique>(&this->request, inputsInfo); + auto tensorInfo = std::make_shared(this->pipelineOutputName, + ovms::Precision::U8, + ovms::Shape{-1, -1}, + Layout{"NC"}); + const tensor_map_t outputsInfo{{this->pipelineOutputName, tensorInfo}}; + auto output_node = std::make_unique>(&this->response, outputsInfo, this->gatherFromNode, false, this->pipelineName); + auto mockedLibrary = createLibraryMock(); + auto custom_node = std::make_unique(this->customNodeName, mockedLibrary, parameters_t{}); + + Pipeline pipeline(*input_node, *output_node, *this->reporter); + pipeline.connect(*input_node, *custom_node, {{this->pipelineInputName, INPUT_TENSOR_NAME}}); + pipeline.connect(*custom_node, *output_node, {{OUTPUT_TENSOR_NAME, this->pipelineOutputName}}); + + pipeline.push(std::move(input_node)); + pipeline.push(std::move(custom_node)); + pipeline.push(std::move(output_node)); + + ASSERT_EQ(pipeline.execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); + std::vector expectedData = { + 'S', 't', 'r', 'i', 'n', 'g', '_', '1', '2', '3', 0, + 'z', 'e', 'b', 'r', 'a', 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + std::vector expectedShape = {3, 11}; + bool checkRaw = false; + checkIncrement4DimResponse(this->pipelineOutputName, expectedData, this->response, expectedShape, checkRaw); +} + +TYPED_TEST(EnsembleFlowStringInput, positive_1d) { + // Most basic configuration, just process single passthrough custom node pipeline request + // input passthrough output + // O------->O------->O + std::vector expectedStrings = {"ala", "", "ma", "kota"}; + prepareInferStringRequest(this->request, this->pipelineInputName, expectedStrings); + + auto inputTensorInfo = std::make_shared(this->pipelineInputName, + ovms::Precision::U8, + ovms::Shape{-1}, + Layout{"NC"}); + const tensor_map_t inputsInfo{{this->pipelineInputName, inputTensorInfo}}; + auto input_node = std::make_unique>(&this->request, inputsInfo); + auto tensorInfo = std::make_shared(this->pipelineOutputName, + ovms::Precision::U8, + ovms::Shape{-1}, + Layout{"NC"}); + const tensor_map_t outputsInfo{{this->pipelineOutputName, tensorInfo}}; + auto output_node = std::make_unique>(&this->response, outputsInfo, this->gatherFromNode, false, this->pipelineName); + auto mockedLibrary = createLibraryMock(); + auto custom_node = std::make_unique(this->customNodeName, mockedLibrary, parameters_t{{"num_of_dims", "1"}}); + + Pipeline pipeline(*input_node, *output_node, *this->reporter); + pipeline.connect(*input_node, *custom_node, {{this->pipelineInputName, INPUT_TENSOR_NAME}}); + pipeline.connect(*custom_node, *output_node, {{OUTPUT_TENSOR_NAME, this->pipelineOutputName}}); + + pipeline.push(std::move(input_node)); + pipeline.push(std::move(custom_node)); + pipeline.push(std::move(output_node)); + + ASSERT_EQ(pipeline.execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); + std::vector expectedData = { + 4, 0, 0, 0, // batch size + 0, 0, 0, 0, // first string start offset + 3, 0, 0, 0, // end of "ala" in condensed content + 3, 0, 0, 0, // end of "" in condensed content + 5, 0, 0, 0, // end of "ma" in condensed content + 9, 0, 0, 0, // end of "kota" in condensed content + 'a', 'l', 'a', + 'm', 'a', + 'k', 'o', 't', 'a'}; + std::vector expectedShape = {33}; + bool checkRaw = false; + checkIncrement4DimResponse(this->pipelineOutputName, expectedData, this->response, expectedShape, checkRaw); +} diff --git a/src/test/ensemble_tests.cpp b/src/test/ensemble_tests.cpp index 852c5fe9c0..3ce54f7756 100644 --- a/src/test/ensemble_tests.cpp +++ b/src/test/ensemble_tests.cpp @@ -21,7 +21,6 @@ #include #include -#include "../binaryutils.hpp" #include "../dags/dl_node.hpp" #include "../dags/entry_node.hpp" #include "../dags/exit_node.hpp" @@ -38,6 +37,7 @@ #include "../modelinstance.hpp" #include "../prediction_service_utils.hpp" #include "../status.hpp" +#include "../tensor_conversion.hpp" #include "../timer.hpp" #include "test_utils.hpp" @@ -78,12 +78,12 @@ class EnsembleFlowBothApiTest : public TestWithTempDir { Layout{"NC"}); } - void prepareRequest(const std::vector& requestData, TFSRequestType& request, const std::string& customPipelineInputName, const signed_shape_t& shape = {1, DUMMY_MODEL_INPUT_SIZE}) { + void prepareRequest(const std::vector& requestData, TFSRequestType& request, const std::string& customPipelineInputName, const ovms::signed_shape_t& shape = {1, DUMMY_MODEL_INPUT_SIZE}) { request.Clear(); preparePredictRequest(request, inputs_info_t{{customPipelineInputName, {shape, ovms::Precision::FP32}}}, requestData); } - void prepareRequest(const std::vector& requestData, KFSRequest& request, const std::string& customPipelineInputName, const signed_shape_t& shape = {1, DUMMY_MODEL_INPUT_SIZE}) { + void prepareRequest(const std::vector& requestData, KFSRequest& request, const std::string& customPipelineInputName, const ovms::signed_shape_t& shape = {1, DUMMY_MODEL_INPUT_SIZE}) { request.Clear(); prepareKFSInferInputTensor(request, customPipelineInputName, std::make_tuple(shape, ovmsPrecisionToKFSPrecision(ovms::Precision::FP32)), requestData); } @@ -103,11 +103,11 @@ class EnsembleFlowBothApiTest : public TestWithTempDir { std::optional requestedModelVersion{std::nullopt}; const std::string customPipelineInputName = "custom_dummy_input"; const std::string customPipelineOutputName = "custom_dummy_output"; - std::shared_ptr dagDummyModelOutputTensorInfo = std::make_shared(customPipelineOutputName, + std::shared_ptr dagDummyModelOutputTensorInfo = std::make_shared(customPipelineOutputName, ovms::Precision::FP32, DUMMY_MODEL_SHAPE_META, Layout{"NC"}); - std::shared_ptr dagDummyModelInputTensorInfo = std::make_shared(customPipelineInputName, + std::shared_ptr dagDummyModelInputTensorInfo = std::make_shared(customPipelineInputName, ovms::Precision::FP32, DUMMY_MODEL_SHAPE_META, Layout{"NC"}); @@ -223,11 +223,11 @@ class EnsembleFlowTest : public TestWithTempDir { std::optional requestedModelVersion{std::nullopt}; const std::string customPipelineInputName = "custom_dummy_input"; const std::string customPipelineOutputName = "custom_dummy_output"; - std::shared_ptr dagDummyModelOutputTensorInfo = std::make_shared(customPipelineOutputName, + std::shared_ptr dagDummyModelOutputTensorInfo = std::make_shared(customPipelineOutputName, ovms::Precision::FP32, DUMMY_MODEL_SHAPE_META, Layout{"NC"}); - std::shared_ptr dagDummyModelInputTensorInfo = std::make_shared(customPipelineInputName, + std::shared_ptr dagDummyModelInputTensorInfo = std::make_shared(customPipelineInputName, ovms::Precision::FP32, DUMMY_MODEL_SHAPE_META, Layout{"NC"}); @@ -311,7 +311,7 @@ TYPED_TEST(EnsembleFlowBothApiTest, TwoInnerNodesConnectedShapeRangePartiallyMat pipeline.push(std::move(output_node)); ASSERT_EQ(pipeline.execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); - checkIncrement4DimResponse(this->customPipelineOutputName, std::vector{7.0, 8.0, 17.0, 18.0}, this->request, this->response, {2, 2}); + checkIncrement4DimResponse(this->customPipelineOutputName, std::vector{7.0, 8.0, 17.0, 18.0}, this->response, {2, 2}); } // 2x4 not passing due to not matched dummy_A (but matching dummy_B) @@ -488,6 +488,11 @@ TEST_F(EnsembleFlowValidationTest, DummyModelProtoValidationErrorBinaryInputWron proto1.mutable_tensor_shape()->add_dim()->set_size(1); proto1.mutable_tensor_shape()->add_dim()->set_size(1); + // enforce the endpoint to be 4d to not fall into string handling + this->dagDummyModelInputTensorInfo = std::make_shared(this->customPipelineInputName, + ovms::Precision::FP32, + ovms::Shape{1, 224, 224, 3}, + ovms::Layout{"NHWC"}); auto pipeline = createDummyPipeline(managerWithDummyModel); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::INVALID_NO_OF_SHAPE_DIMENSIONS); } @@ -501,6 +506,11 @@ TEST_F(EnsembleFlowValidationTest, DummyModelProtoValidationErrorBinaryInputBatc proto1.set_dtype(tensorflow::DataType::DT_STRING); proto1.mutable_tensor_shape()->add_dim()->set_size(2); + // enforce the endpoint to be 4d to not fall into string handling + this->dagDummyModelInputTensorInfo = std::make_shared(this->customPipelineInputName, + ovms::Precision::FP32, + ovms::Shape{1, 224, 224, 3}, + ovms::Layout{"NHWC"}); auto pipeline = createDummyPipeline(managerWithDummyModel); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::INVALID_BATCH_SIZE); } @@ -677,7 +687,7 @@ TEST_F(EnsembleFlowTest, DummyModelDirectAndPipelineInference) { tensorflow::serving::PredictRequest simpleModelRequest; preparePredictRequest(simpleModelRequest, {{DUMMY_MODEL_INPUT_NAME, - std::tuple{{1, 10}, ovms::Precision::FP32}}}); + std::tuple{{1, 10}, ovms::Precision::FP32}}}); std::vector requestData{1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}; auto& input = (*simpleModelRequest.mutable_inputs())[DUMMY_MODEL_INPUT_NAME]; input.mutable_tensor_content()->assign((char*)requestData.data(), requestData.size() * sizeof(float)); @@ -4562,7 +4572,7 @@ TEST_F(EnsembleFlowTest, ExecuteSingleIncrement4DimInputNHWC) { ASSERT_EQ(manager.getPipelineFactory().create(pipeline, "increment_pipeline", &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); - checkIncrement4DimResponse("pipeline_output", {2.0, 5.0, 3.0, 6.0, 4.0, 7.0}, request, response, {1, 3, 1, 2}); + checkIncrement4DimResponse("pipeline_output", {2.0, 5.0, 3.0, 6.0, 4.0, 7.0}, response, {1, 3, 1, 2}); } static const char* pipelineSingleIncrement4DimInputNHWCDynamicBatch = R"( @@ -4622,7 +4632,7 @@ TYPED_TEST(EnsembleFlowBothApiTest, ExecuteSingleIncrement4DimInputNHWCDynamicBa ASSERT_EQ(manager.getPipelineFactory().create(pipeline, "increment_pipeline", &this->request, &this->response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); - checkIncrement4DimResponse(std::string{"pipeline_output"}, {2.0, 5.0, 3.0, 6.0, 4.0, 7.0, 11.0, 41.0, 21.0, 51.0, 31.0, 61.0}, this->request, this->response, {2, 1, 3, 1, 2}); + checkIncrement4DimResponse(std::string{"pipeline_output"}, {2.0, 5.0, 3.0, 6.0, 4.0, 7.0, 11.0, 41.0, 21.0, 51.0, 31.0, 61.0}, this->response, {2, 1, 3, 1, 2}); } static const char* pipelineSingleIncrement4DimOutputNHWC = R"( @@ -4680,7 +4690,7 @@ TEST_F(EnsembleFlowTest, ExecuteSingleIncrement4DimOutputNHWC) { ASSERT_EQ(manager.getPipelineFactory().create(pipeline, "increment_pipeline", &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); - checkIncrement4DimResponse("pipeline_output", {2.0, 4.0, 6.0, 3.0, 5.0, 7.0}, request, response, {1, 1, 2, 3}); + checkIncrement4DimResponse("pipeline_output", {2.0, 4.0, 6.0, 3.0, 5.0, 7.0}, response, {1, 1, 2, 3}); } static const char* pipelineSingleIncrement4DimOutputNHWCDynamicBatch = R"( @@ -4741,7 +4751,7 @@ TYPED_TEST(EnsembleFlowBothApiTest, ExecuteSingleIncrement4DimOutputNHWCDynamicB ASSERT_EQ(manager.getPipelineFactory().create(pipeline, "increment_pipeline", &this->request, &this->response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); - checkIncrement4DimResponse("pipeline_output", {2.0, 4.0, 6.0, 3.0, 5.0, 7.0, 11.0, 31.0, 51, 21.0, 41.0, 61.0}, this->request, this->response, {2, 1, 1, 2, 3}); + checkIncrement4DimResponse("pipeline_output", {2.0, 4.0, 6.0, 3.0, 5.0, 7.0, 11.0, 31.0, 51, 21.0, 41.0, 61.0}, this->response, {2, 1, 1, 2, 3}); } static const char* pipelineAmbiguousInputMeta = R"( @@ -4899,7 +4909,7 @@ TEST_F(EnsembleFlowTest, ExecutePipelineWithInnerNhwcConnection) { ASSERT_EQ(manager.getPipelineFactory().create(pipeline, "increment_pipeline", &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); - checkIncrement4DimResponse("pipeline_output", {3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, request, response, {1, 3, 1, 2}); + checkIncrement4DimResponse("pipeline_output", {3.0, 4.0, 5.0, 6.0, 7.0, 8.0}, response, {1, 3, 1, 2}); } class EnsembleFlowTestBinaryInput : public EnsembleFlowTest { @@ -4965,7 +4975,7 @@ TEST_F(EnsembleFlowTestBinaryInput, BatchSize1) { ASSERT_EQ(manager.getPipelineFactory().create(pipeline, "increment_pipeline", &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); - checkIncrement4DimResponse("pipeline_output", {37.0, 28.0, 238.0}, request, response, {1, 3, 1, 1}); + checkIncrement4DimResponse("pipeline_output", {37.0, 28.0, 238.0}, response, {1, 3, 1, 1}); } static const char* pipelineWith4DimDummyFP64 = R"( @@ -5022,7 +5032,7 @@ TEST_F(EnsembleFlowTestBinaryInput, DoublePrecision) { ASSERT_EQ(manager.getPipelineFactory().create(pipeline, "increment_pipeline", &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); - checkIncrement4DimResponse("pipeline_output", {37.0, 28.0, 238.0}, request, response, {1, 1, 1, 3}); + checkIncrement4DimResponse("pipeline_output", {37.0, 28.0, 238.0}, response, {1, 1, 1, 3}); } static const char* pipelineSingleIncrement4DimOutputNHWC1x1BatchAny = R"( @@ -5195,7 +5205,7 @@ TEST_F(EnsembleFlowTestBinaryInput, GrayscaleImage) { ASSERT_EQ(manager.getPipelineFactory().create(pipeline, "increment_pipeline", &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); - checkIncrement4DimResponse("pipeline_output", {1.0}, request, response, {1, 1, 1, 1}); + checkIncrement4DimResponse("pipeline_output", {1.0}, response, {1, 1, 1, 1}); } static const char* pipelineSingleIncrement4DimOutputNHWC1x1BS5 = R"( @@ -5254,7 +5264,7 @@ TEST_F(EnsembleFlowTestBinaryInput, BatchSize5) { ASSERT_EQ(manager.getPipelineFactory().create(pipeline, "increment_pipeline", &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); - checkIncrement4DimResponse("pipeline_output", {37.0, 28.0, 238.0, 37.0, 28.0, 238.0, 37.0, 28.0, 238.0, 37.0, 28.0, 238.0, 37.0, 28.0, 238.0}, request, response, {5, 3, 1, 1}); + checkIncrement4DimResponse("pipeline_output", {37.0, 28.0, 238.0, 37.0, 28.0, 238.0, 37.0, 28.0, 238.0, 37.0, 28.0, 238.0, 37.0, 28.0, 238.0}, response, {5, 3, 1, 1}); } static const char* pipelineSingleIncrement4DimOutputNHWC2x2 = R"( @@ -5312,7 +5322,7 @@ TEST_F(EnsembleFlowTestBinaryInput, ResizeBatch1) { ASSERT_EQ(manager.getPipelineFactory().create(pipeline, "increment_pipeline", &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); - checkIncrement4DimResponse("pipeline_output", {37.0, 37.0, 37.0, 37.0, 28.0, 28.0, 28.0, 28.0, 238.0, 238.0, 238.0, 238.0}, request, response, {1, 3, 2, 2}); + checkIncrement4DimResponse("pipeline_output", {37.0, 37.0, 37.0, 37.0, 28.0, 28.0, 28.0, 28.0, 238.0, 238.0, 238.0, 238.0}, response, {1, 3, 2, 2}); } static const char* pipelineSingleIncrement4DimOutputNHWC2x2BS5 = R"( @@ -5371,8 +5381,7 @@ TEST_F(EnsembleFlowTestBinaryInput, ResizeBatch5) { ASSERT_EQ(manager.getPipelineFactory().create(pipeline, "increment_pipeline", &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); - checkIncrement4DimResponse("pipeline_output", {37.0, 37.0, 37.0, 37.0, 28.0, 28.0, 28.0, 28.0, 238.0, 238.0, 238.0, 238.0, 37.0, 37.0, 37.0, 37.0, 28.0, 28.0, 28.0, 28.0, 238.0, 238.0, 238.0, 238.0, 37.0, 37.0, 37.0, 37.0, 28.0, 28.0, 28.0, 28.0, 238.0, 238.0, 238.0, 238.0, 37.0, 37.0, 37.0, 37.0, 28.0, 28.0, 28.0, 28.0, 238.0, 238.0, 238.0, 238.0, 37.0, 37.0, 37.0, 37.0, 28.0, 28.0, 28.0, 28.0, 238.0, 238.0, 238.0, 238.0}, - request, response, {5, 3, 2, 2}); + checkIncrement4DimResponse("pipeline_output", {37.0, 37.0, 37.0, 37.0, 28.0, 28.0, 28.0, 28.0, 238.0, 238.0, 238.0, 238.0, 37.0, 37.0, 37.0, 37.0, 28.0, 28.0, 28.0, 28.0, 238.0, 238.0, 238.0, 238.0, 37.0, 37.0, 37.0, 37.0, 28.0, 28.0, 28.0, 28.0, 238.0, 238.0, 238.0, 238.0, 37.0, 37.0, 37.0, 37.0, 28.0, 28.0, 28.0, 28.0, 238.0, 238.0, 238.0, 238.0, 37.0, 37.0, 37.0, 37.0, 28.0, 28.0, 28.0, 28.0, 238.0, 238.0, 238.0, 238.0}, response, {5, 3, 2, 2}); } static const char* pipelineSingleIncrement4DimOutputNHWC1Channel = R"( @@ -5507,7 +5516,7 @@ TEST_F(EnsembleFlowTestBinaryInput, EntryDemultiplexer) { ASSERT_EQ(manager.getPipelineFactory().create(pipeline, "increment_pipeline", &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); - checkIncrement4DimResponse("pipeline_output", {37.0, 28.0, 238.0, 37.0, 28.0, 238.0, 37.0, 28.0, 238.0, 37.0, 28.0, 238.0, 37.0, 28.0, 238.0}, request, response, {5, 1, 3, 1, 1}); + checkIncrement4DimResponse("pipeline_output", {37.0, 28.0, 238.0, 37.0, 28.0, 238.0, 37.0, 28.0, 238.0, 37.0, 28.0, 238.0, 37.0, 28.0, 238.0}, response, {5, 1, 3, 1, 1}); } static const char* pipelineSingleIncrement4DimOutputNHWCRangeResolutionEntryStaticDemultiplexer = R"( @@ -5567,7 +5576,7 @@ TEST_F(EnsembleFlowTestBinaryInput, EntryStaticDemultiplexerResolutionMatches) { ASSERT_EQ(manager.getPipelineFactory().create(pipeline, "increment_pipeline", &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); - checkIncrement4DimResponse("pipeline_output", {37.0, 28.0, 238.0, 37.0, 28.0, 238.0, 37.0, 28.0, 238.0, 37.0, 28.0, 238.0, 37.0, 28.0, 238.0}, request, response, {5, 1, 3, 1, 1}); + checkIncrement4DimResponse("pipeline_output", {37.0, 28.0, 238.0, 37.0, 28.0, 238.0, 37.0, 28.0, 238.0, 37.0, 28.0, 238.0, 37.0, 28.0, 238.0}, response, {5, 1, 3, 1, 1}); } TEST_F(EnsembleFlowTestBinaryInput, EntryStaticDemultiplexerResolutionAutoAlign) { @@ -5643,7 +5652,7 @@ TEST_F(EnsembleFlowTestBinaryInput, EntryDynamicDemultiplexerResolutionMatches) ASSERT_EQ(manager.getPipelineFactory().create(pipeline, "increment_pipeline", &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); - checkIncrement4DimResponse("pipeline_output", {37.0, 28.0, 238.0, 37.0, 28.0, 238.0, 37.0, 28.0, 238.0, 37.0, 28.0, 238.0, 37.0, 28.0, 238.0}, request, response, {5, 1, 3, 1, 1}); + checkIncrement4DimResponse("pipeline_output", {37.0, 28.0, 238.0, 37.0, 28.0, 238.0, 37.0, 28.0, 238.0, 37.0, 28.0, 238.0, 37.0, 28.0, 238.0}, response, {5, 1, 3, 1, 1}); } TEST_F(EnsembleFlowTestBinaryInput, EntryDynamicDemultiplexerResolutionResolutionMismatch) { @@ -5714,7 +5723,7 @@ TEST_F(EnsembleFlowTestBinaryInput, BinaryInputWithPipelineInputLayoutANY_Reques ASSERT_EQ(manager.getPipelineFactory().create(pipeline, "my_pipeline", &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); - checkIncrement4DimResponse("pipeline_output", {44.0, 35.0, 245.0}, request, response, {1, 1, 1, 3}); + checkIncrement4DimResponse("pipeline_output", {44.0, 35.0, 245.0}, response, {1, 1, 1, 3}); } TEST_F(EnsembleFlowTestBinaryInput, BinaryInputWithPipelineInputLayoutANY_RequestBS2) { @@ -5728,7 +5737,7 @@ TEST_F(EnsembleFlowTestBinaryInput, BinaryInputWithPipelineInputLayoutANY_Reques ASSERT_EQ(manager.loadConfig(fileToReload), StatusCode::OK); ASSERT_EQ(manager.getPipelineFactory().create(pipeline, "my_pipeline", &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); - checkIncrement4DimResponse("pipeline_output", {44.0, 35.0, 245.0, 44.0, 35.0, 245.0}, request, response, {2, 1, 1, 3}); + checkIncrement4DimResponse("pipeline_output", {44.0, 35.0, 245.0, 44.0, 35.0, 245.0}, response, {2, 1, 1, 3}); } TEST_F(EnsembleFlowTestBinaryInput, BinaryInputWithPipelineInputLayoutANY_RequestMisaligned) { @@ -5806,7 +5815,7 @@ TEST_F(EnsembleFlowTestBinaryInput, BinaryInputWithPipelineInputLayoutANYAndDemu ASSERT_EQ(manager.getPipelineFactory().create(pipeline, "my_pipeline", &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); - checkIncrement4DimResponse("pipeline_output", {44.0, 35.0, 245.0}, request, response, {1, 1, 1, 1, 3}); + checkIncrement4DimResponse("pipeline_output", {44.0, 35.0, 245.0}, response, {1, 1, 1, 1, 3}); } TEST_F(EnsembleFlowTestBinaryInput, BinaryInputWithPipelineInputLayoutANYAndDemultiplexer_RequestBS2) { @@ -5820,7 +5829,7 @@ TEST_F(EnsembleFlowTestBinaryInput, BinaryInputWithPipelineInputLayoutANYAndDemu ASSERT_EQ(manager.loadConfig(fileToReload), StatusCode::OK); ASSERT_EQ(manager.getPipelineFactory().create(pipeline, "my_pipeline", &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); - checkIncrement4DimResponse("pipeline_output", {44.0, 35.0, 245.0, 44.0, 35.0, 245.0}, request, response, {2, 1, 1, 1, 3}); + checkIncrement4DimResponse("pipeline_output", {44.0, 35.0, 245.0, 44.0, 35.0, 245.0}, response, {2, 1, 1, 1, 3}); } TEST_F(EnsembleFlowTestBinaryInput, BinaryInputWithPipelineInputLayoutANYAndDemultiplexer_RequestMisaligned) { @@ -5923,7 +5932,7 @@ TEST_F(EnsembleFlowTestBinaryInput, BinaryInputWithPipelineInputLayoutANYCustomN ASSERT_EQ(manager.getPipelineFactory().create(pipeline, "my_pipeline", &request, &response, manager), StatusCode::OK); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); - checkIncrement4DimResponse("pipeline_output", {45.0, 36.0, 246.0}, request, response, {1, 1, 3, 1, 1}); + checkIncrement4DimResponse("pipeline_output", {45.0, 36.0, 246.0}, response, {1, 1, 3, 1, 1}); } static const char* pipelineWithDynamicCustomNodeDemultiplexerAndRangeOfResolutionModel = R"( @@ -6012,5 +6021,5 @@ TEST_F(EnsembleFlowTestBinaryInput, BinaryInputWithPipelineInputLayoutANYCustomN prepareBinaryRequest(imagePath, request, "pipeline_input", batchSize); ASSERT_EQ(pipeline->execute(DEFAULT_TEST_CONTEXT), StatusCode::OK); - checkIncrement4DimResponse("pipeline_output", {45.0, 36.0, 246.0}, request, response, {1, 1, 3, 1, 1}); + checkIncrement4DimResponse("pipeline_output", {45.0, 36.0, 246.0}, response, {1, 1, 3, 1, 1}); } diff --git a/src/test/get_model_metadata_validation_test.cpp b/src/test/get_model_metadata_validation_test.cpp index ead4575163..1a4512d701 100644 --- a/src/test/get_model_metadata_validation_test.cpp +++ b/src/test/get_model_metadata_validation_test.cpp @@ -41,12 +41,17 @@ TEST_F(GetModelMetadataValidation, ValidRequestWithVersionSpecified) { EXPECT_TRUE(status.ok()); } +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-result" + TEST_F(GetModelMetadataValidation, RequestMissingModelSpec) { request.release_model_spec(); auto status = ovms::GetModelMetadataImpl::validate(&request); EXPECT_EQ(status, ovms::StatusCode::MODEL_SPEC_MISSING); } +#pragma GCC diagnostic pop + TEST_F(GetModelMetadataValidation, RequestMissingMetadataField) { request.mutable_metadata_field()->RemoveLast(); auto status = ovms::GetModelMetadataImpl::validate(&request); diff --git a/src/test/http_rest_api_handler_test.cpp b/src/test/http_rest_api_handler_test.cpp index ebd3458b45..84ba1d7092 100644 --- a/src/test/http_rest_api_handler_test.cpp +++ b/src/test/http_rest_api_handler_test.cpp @@ -17,6 +17,7 @@ #include "../config.hpp" #include "../http_rest_api_handler.hpp" +#include "../localfilesystem.hpp" #include "../logging.hpp" #include "../modelmanager.hpp" #include "../servablemanagermodule.hpp" @@ -624,7 +625,83 @@ TEST_F(ConfigReload, StartWith1DummyThenReloadToAddPipeline) { EXPECT_EQ(expectedJson, response); EXPECT_EQ(status, ovms::StatusCode::OK_RELOADED); } +#if (MEDIAPIPE_DISABLE == 0) +TEST_F(ConfigReload, StartWith1DummyThenReloadToMediapipe) { + ovms::Server& ovmsServer = ovms::Server::instance(); + TestHelper1 t(*this, configWith1Dummy); + auto handler = ovms::HttpRestApiHandler(ovmsServer, 10); + + std::this_thread::sleep_for(std::chrono::seconds(1)); + RemoveConfig(); + + std::string contents; + auto fs = std::make_shared(); + fs->readTextFile("/ovms/src/test/mediapipe/config_mediapipe_add_adapter_full.json", &contents); + SetUpConfig(contents); + std::this_thread::sleep_for(std::chrono::seconds(1)); + + const char* expectedJson = R"({ +"add" : +{ + "model_version_status": [ + { + "version": "1", + "state": "AVAILABLE", + "status": { + "error_code": "OK", + "error_message": "OK" + } + } + ] +}, +"dummy" : +{ + "model_version_status": [ + { + "version": "1", + "state": "END", + "status": { + "error_code": "OK", + "error_message": "OK" + } + } + ] +}, +"mediapipeAdd" : +{ + "model_version_status": [ + { + "version": "1", + "state": "AVAILABLE", + "status": { + "error_code": "OK", + "error_message": "OK" + } + } + ] +}, +"mediapipeAddADAPTFULL" : +{ + "model_version_status": [ + { + "version": "1", + "state": "AVAILABLE", + "status": { + "error_code": "OK", + "error_message": "OK" + } + } + ] +} +})"; + std::string response; + auto status = handler.processConfigReloadRequest(response, t.getManager()); + + EXPECT_EQ(expectedJson, response); + EXPECT_EQ(status, ovms::StatusCode::OK_RELOADED); +} +#endif static const char* configWithPipelineWithInvalidOutputs = R"( { "model_config_list": [ @@ -998,3 +1075,183 @@ TEST_F(ConfigStatus, configWithPipelines) { EXPECT_EQ(expectedJson, response); EXPECT_EQ(status, ovms::StatusCode::OK); } +#if (MEDIAPIPE_DISABLE == 0) +TEST_F(ConfigStatus, configWithMediapipe) { + ovms::Server& ovmsServer = ovms::Server::instance(); + + std::string contents; + auto fs = std::make_shared(); + fs->readTextFile("/ovms/src/test/mediapipe/config_mediapipe_add_adapter_full.json", &contents); + + TestHelper1 t(*this, contents.c_str()); + auto handler = ovms::HttpRestApiHandler(ovmsServer, 10); + std::string response; + + const char* expectedJson = R"({ +"add" : +{ + "model_version_status": [ + { + "version": "1", + "state": "AVAILABLE", + "status": { + "error_code": "OK", + "error_message": "OK" + } + } + ] +}, +"mediapipeAdd" : +{ + "model_version_status": [ + { + "version": "1", + "state": "AVAILABLE", + "status": { + "error_code": "OK", + "error_message": "OK" + } + } + ] +}, +"mediapipeAddADAPTFULL" : +{ + "model_version_status": [ + { + "version": "1", + "state": "AVAILABLE", + "status": { + "error_code": "OK", + "error_message": "OK" + } + } + ] +} +})"; + auto status = handler.processConfigStatusRequest(response, t.getManager()); + EXPECT_EQ(expectedJson, response); + EXPECT_EQ(status, ovms::StatusCode::OK); +} + +TEST_F(ConfigStatus, configWithMediapipeRemoved) { + ovms::Server& ovmsServer = ovms::Server::instance(); + + std::string contents; + auto fs = std::make_shared(); + fs->readTextFile("/ovms/src/test/mediapipe/config_mediapipe_add_adapter_full.json", &contents); + + TestHelper1 t(*this, contents.c_str()); + auto handler = ovms::HttpRestApiHandler(ovmsServer, 10); + std::string response; + + const char* expectedJson = R"({ +"add" : +{ + "model_version_status": [ + { + "version": "1", + "state": "AVAILABLE", + "status": { + "error_code": "OK", + "error_message": "OK" + } + } + ] +}, +"mediapipeAdd" : +{ + "model_version_status": [ + { + "version": "1", + "state": "AVAILABLE", + "status": { + "error_code": "OK", + "error_message": "OK" + } + } + ] +}, +"mediapipeAddADAPTFULL" : +{ + "model_version_status": [ + { + "version": "1", + "state": "AVAILABLE", + "status": { + "error_code": "OK", + "error_message": "OK" + } + } + ] +} +})"; + auto status = handler.processConfigStatusRequest(response, t.getManager()); + EXPECT_EQ(expectedJson, response); + EXPECT_EQ(status, ovms::StatusCode::OK); + + std::this_thread::sleep_for(std::chrono::seconds(1)); + RemoveConfig(); + + SetUpConfig(configWith1Dummy); + std::this_thread::sleep_for(std::chrono::seconds(1)); + + const char* expectedJsonRemoved = R"({ +"add" : +{ + "model_version_status": [ + { + "version": "1", + "state": "END", + "status": { + "error_code": "OK", + "error_message": "OK" + } + } + ] +}, +"dummy" : +{ + "model_version_status": [ + { + "version": "1", + "state": "AVAILABLE", + "status": { + "error_code": "OK", + "error_message": "OK" + } + } + ] +}, +"mediapipeAdd" : +{ + "model_version_status": [ + { + "version": "1", + "state": "END", + "status": { + "error_code": "OK", + "error_message": "OK" + } + } + ] +}, +"mediapipeAddADAPTFULL" : +{ + "model_version_status": [ + { + "version": "1", + "state": "END", + "status": { + "error_code": "OK", + "error_message": "OK" + } + } + ] +} +})"; + + status = handler.processConfigReloadRequest(response, t.getManager()); + EXPECT_EQ(expectedJsonRemoved, response); + EXPECT_EQ(status, ovms::StatusCode::OK_RELOADED); +} +#endif diff --git a/src/test/inferencerequest_test.cpp b/src/test/inferencerequest_test.cpp index 435d82be50..ca5c842ba1 100644 --- a/src/test/inferencerequest_test.cpp +++ b/src/test/inferencerequest_test.cpp @@ -49,7 +49,7 @@ const uint32_t PRIORITY{7}; const uint64_t REQUEST_ID{3}; const std::string INPUT_NAME{"NOT_RANDOM_NAME"}; -const ovms::shape_t INPUT_SHAPE{1, 3, 220, 230}; +const ovms::signed_shape_t INPUT_SHAPE{1, 3, 220, 230}; const std::array INPUT_DATA{1, 2, 3, 4, 5, 6, 7, 8, 9, 0}; constexpr size_t INPUT_DATA_BYTESIZE{INPUT_DATA.size() * sizeof(float)}; const OVMS_DataType DATATYPE{OVMS_DATATYPE_FP32}; @@ -97,7 +97,8 @@ TEST(InferenceRequest, CreateInferenceRequest) { ASSERT_EQ(status, StatusCode::OK) << status.string(); ASSERT_NE(nullptr, tensor); EXPECT_EQ(tensor->getDataType(), DATATYPE); - EXPECT_TRUE(Shape(tensor->getShape()).match(INPUT_SHAPE)); + ASSERT_EQ(tensor->getShape().size(), INPUT_SHAPE.size()); + EXPECT_EQ(std::memcmp(tensor->getShape().data(), INPUT_SHAPE.data(), INPUT_SHAPE.size() * sizeof(int64_t)), 0); const Buffer* buffer = tensor->getBuffer(); ASSERT_NE(nullptr, buffer); ASSERT_NE(nullptr, buffer->data()); diff --git a/src/test/kfs_rest_parser_test.cpp b/src/test/kfs_rest_parser_test.cpp index 49e5837fe7..ad788470fe 100644 --- a/src/test/kfs_rest_parser_test.cpp +++ b/src/test/kfs_rest_parser_test.cpp @@ -297,6 +297,52 @@ TEST_F(KFSRestParserTest, parseValidRequestBOOL) { ASSERT_THAT(proto.inputs()[0].contents().bool_contents(), ElementsAre(true, true, false, false)); } +TEST_F(KFSRestParserTest, parseValidRequestStringInput) { + std::string request = R"({ + "inputs" : [ + { + "name" : "input0", + "shape" : [ 3 ], + "datatype" : "BYTES", + "data" : [ "zebra", "openvino", "123"] + } + ] + })"; + auto status = parser.parse(request.c_str()); + ASSERT_EQ(status, StatusCode::OK); + + auto proto = parser.getProto(); + ASSERT_EQ(proto.inputs_size(), 1); + ASSERT_EQ(proto.inputs()[0].name(), "input0"); + ASSERT_THAT(proto.inputs()[0].shape(), ElementsAre(3)); + ASSERT_EQ(proto.inputs()[0].datatype(), "BYTES"); + ASSERT_EQ(proto.inputs()[0].contents().bytes_contents_size(), 3); + ASSERT_THAT(proto.inputs()[0].contents().bytes_contents(), ElementsAre("zebra", "openvino", "123")); +} + +TEST_F(KFSRestParserTest, parseValidRequestStringInputNested) { + std::string request = R"({ + "inputs" : [ + { + "name" : "input0", + "shape" : [ 3 ], + "datatype" : "BYTES", + "data" : [ ["zebra"], ["openvino", "123"]] + } + ] + })"; + auto status = parser.parse(request.c_str()); + ASSERT_EQ(status, StatusCode::OK); + + auto proto = parser.getProto(); + ASSERT_EQ(proto.inputs_size(), 1); + ASSERT_EQ(proto.inputs()[0].name(), "input0"); + ASSERT_THAT(proto.inputs()[0].shape(), ElementsAre(3)); + ASSERT_EQ(proto.inputs()[0].datatype(), "BYTES"); + ASSERT_EQ(proto.inputs()[0].contents().bytes_contents_size(), 3); + ASSERT_THAT(proto.inputs()[0].contents().bytes_contents(), ElementsAre("zebra", "openvino", "123")); +} + TEST_F(KFSRestParserTest, parseValidRequestBYTES) { std::string request = R"({ "inputs" : [ @@ -864,6 +910,36 @@ TEST_F(KFSRestParserTest, parseInvalidRequestWithInputsMissing) { ASSERT_EQ(status, StatusCode::REST_NO_INPUTS_FOUND); } +TEST_F(KFSRestParserTest, parseInvalidRequestStringInput_datatypeNotU8) { + std::string request = R"({ + "inputs" : [ + { + "name" : "input0", + "shape" : [ 3 ], + "datatype" : "UINT16", + "data" : [ "zebra", "openvino", "123"] + } + ] + })"; + auto status = parser.parse(request.c_str()); + ASSERT_EQ(status, StatusCode::REST_COULD_NOT_PARSE_INPUT); +} + +TEST_F(KFSRestParserTest, parseInvalidRequestStringInput) { + std::string request = R"({ + "inputs" : [ + { + "name" : "input0", + "shape" : [ 3 ], + "datatype" : "BYTES", + "data" : [ "zebra", "openvino", 123] + } + ] + })"; + auto status = parser.parse(request.c_str()); + ASSERT_EQ(status, StatusCode::REST_COULD_NOT_PARSE_INPUT); +} + TEST_F(KFSRestParserTest, parseInvalidDataNotHeterogenous) { std::string request = R"({ "inputs" : [ diff --git a/src/test/kfs_rest_test.cpp b/src/test/kfs_rest_test.cpp index 907574948f..56f2609ceb 100644 --- a/src/test/kfs_rest_test.cpp +++ b/src/test/kfs_rest_test.cpp @@ -94,6 +94,20 @@ class HttpRestApiHandlerTest : public ::testing::Test { std::unique_ptr HttpRestApiHandlerTest::server = nullptr; std::unique_ptr HttpRestApiHandlerTest::thread = nullptr; +TEST_F(HttpRestApiHandlerTest, GetModelMetadataWithLongVersion) { + std::string request = "/v1/models/dummy/versions/72487667423532349025128558057"; + ovms::HttpRequestComponents comp; + + ASSERT_EQ(handler->parseRequestComponents(comp, "GET", request), StatusCode::MODEL_VERSION_MISSING); +} + +TEST_F(HttpRestApiHandlerTest, GetModelMetadataWithEscapedPath) { + std::string request = "/v1/models/..iO!.0?E*/versions/1/metadata"; + ovms::HttpRequestComponents comp; + + ASSERT_EQ(handler->parseRequestComponents(comp, "GET", request), StatusCode::OK); +} + TEST_F(HttpRestApiHandlerTest, RegexParseReadyWithImplicitVersion) { std::string request = "/v2/models/dummy/ready"; ovms::HttpRequestComponents comp; @@ -349,7 +363,9 @@ TEST_F(HttpRestApiHandlerTest, binaryInputsINT8) { ASSERT_EQ(grpc_request.inputs_size(), 1); ASSERT_EQ(grpc_request.model_name(), modelName); ASSERT_EQ(grpc_request.model_version(), std::to_string(modelVersion.value())); - auto params = grpc_request.parameters(); + auto params = grpc_request.inputs()[0].parameters(); + ASSERT_EQ(params.count("binary_data_size"), 1); + ASSERT_EQ(params["binary_data_size"].int64_param(), 4); ASSERT_EQ(grpc_request.inputs()[0].name(), "b"); ASSERT_EQ(grpc_request.inputs()[0].datatype(), "INT8"); @@ -357,7 +373,50 @@ TEST_F(HttpRestApiHandlerTest, binaryInputsINT8) { ASSERT_EQ(grpc_request.inputs()[0].shape()[1], 4); int i = 0; - for (auto content : grpc_request.inputs()[0].contents().int_contents()) { + for (auto content : grpc_request.raw_input_contents()[0]) { + ASSERT_EQ(content, i++); + } + ASSERT_EQ(i, 4); +} + +TEST_F(HttpRestApiHandlerTest, binaryInputsINT8_twoInputs) { + std::string binaryData{0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03}; + std::string request_body = "{\"inputs\":[{\"name\":\"b\",\"shape\":[1,4],\"datatype\":\"INT8\",\"parameters\":{\"binary_data_size\":4}}, {\"name\":\"c\",\"shape\":[1,4],\"datatype\":\"INT8\",\"parameters\":{\"binary_data_size\":4}}]}"; + request_body += binaryData; + + ::KFSRequest grpc_request; + int inferenceHeaderContentLength = (request_body.size() - binaryData.size()); + ASSERT_EQ(HttpRestApiHandler::prepareGrpcRequest(modelName, modelVersion, request_body, grpc_request, inferenceHeaderContentLength), ovms::StatusCode::OK); + + ASSERT_EQ(grpc_request.inputs_size(), 2); + ASSERT_EQ(grpc_request.raw_input_contents_size(), 2); + ASSERT_EQ(grpc_request.model_name(), modelName); + ASSERT_EQ(grpc_request.model_version(), std::to_string(modelVersion.value())); + auto params1 = grpc_request.inputs()[0].parameters(); + ASSERT_EQ(params1.count("binary_data_size"), 1); + ASSERT_EQ(params1["binary_data_size"].int64_param(), 4); + ASSERT_EQ(grpc_request.inputs()[0].name(), "b"); + ASSERT_EQ(grpc_request.inputs()[0].datatype(), "INT8"); + + ASSERT_EQ(grpc_request.inputs()[0].shape()[0], 1); + ASSERT_EQ(grpc_request.inputs()[0].shape()[1], 4); + + int i = 0; + for (auto content : grpc_request.raw_input_contents()[0]) { + ASSERT_EQ(content, i++); + } + ASSERT_EQ(i, 4); + auto params2 = grpc_request.inputs()[1].parameters(); + ASSERT_EQ(params2.count("binary_data_size"), 1); + ASSERT_EQ(params2["binary_data_size"].int64_param(), 4); + ASSERT_EQ(grpc_request.inputs()[1].name(), "c"); + ASSERT_EQ(grpc_request.inputs()[1].datatype(), "INT8"); + + ASSERT_EQ(grpc_request.inputs()[1].shape()[0], 1); + ASSERT_EQ(grpc_request.inputs()[1].shape()[1], 4); + + i = 0; + for (auto content : grpc_request.raw_input_contents()[1]) { ASSERT_EQ(content, i++); } ASSERT_EQ(i, 4); @@ -367,6 +426,7 @@ static void assertSingleBinaryInput(const std::string& modelName, const std::opt ASSERT_EQ(grpc_request.inputs_size(), 1); ASSERT_EQ(grpc_request.model_name(), modelName); ASSERT_EQ(grpc_request.model_version(), std::to_string(modelVersion.value())); + ASSERT_EQ(grpc_request.raw_input_contents().size(), 1); ASSERT_EQ(grpc_request.inputs()[0].name(), "b"); } @@ -376,24 +436,41 @@ static void assertBinaryInputsBYTES(const std::string& modelName, const std::opt ASSERT_EQ(grpc_request.inputs()[0].datatype(), "BYTES"); ASSERT_EQ(grpc_request.inputs()[0].shape()[0], 1); - ASSERT_EQ(grpc_request.inputs()[0].contents().bytes_contents()[0], binaryData); - ASSERT_EQ(grpc_request.inputs()[0].contents().bytes_contents_size(), 1); + uint32_t dataSize = *((int32_t*)(binaryData.data())); + ASSERT_EQ(dataSize, binaryData.size() - sizeof(uint32_t)); + ASSERT_EQ(grpc_request.raw_input_contents()[0].size(), binaryData.size()); + ASSERT_EQ(grpc_request.raw_input_contents()[0], binaryData); } TEST_F(HttpRestApiHandlerTest, binaryInputsBYTES) { - std::string binaryData{0x00, 0x01, 0x02, 0x03}; - std::string request_body = "{\"inputs\":[{\"name\":\"b\",\"shape\":[1],\"datatype\":\"BYTES\",\"parameters\":{\"binary_data_size\":4}}]}"; + std::string binaryData{0x04, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03}; + std::string request_body = "{\"inputs\":[{\"name\":\"b\",\"shape\":[1],\"datatype\":\"BYTES\",\"parameters\":{\"binary_data_size\":8}}]}"; request_body += binaryData; ::KFSRequest grpc_request; int inferenceHeaderContentLength = (request_body.size() - binaryData.size()); ASSERT_EQ(HttpRestApiHandler::prepareGrpcRequest(modelName, modelVersion, request_body, grpc_request, inferenceHeaderContentLength), ovms::StatusCode::OK); - ASSERT_EQ(grpc_request.inputs()[0].parameters().count("binary_data_size"), 1); assertBinaryInputsBYTES(modelName, modelVersion, grpc_request, binaryData); } +TEST_F(HttpRestApiHandlerTest, binaryInputsBYTES_Batch2) { + std::string binaryData{0x04, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x02}; + std::string request_body = "{\"inputs\":[{\"name\":\"b\",\"shape\":[2],\"datatype\":\"BYTES\",\"parameters\":{\"binary_data_size\":20}}]}"; + request_body += binaryData; + + ::KFSRequest grpc_request; + int inferenceHeaderContentLength = (request_body.size() - binaryData.size()); + ASSERT_EQ(HttpRestApiHandler::prepareGrpcRequest(modelName, modelVersion, request_body, grpc_request, inferenceHeaderContentLength), ovms::StatusCode::OK); + assertSingleBinaryInput(modelName, modelVersion, grpc_request); + + ASSERT_EQ(grpc_request.inputs()[0].datatype(), "BYTES"); + ASSERT_EQ(grpc_request.inputs()[0].shape()[0], 2); + ASSERT_EQ(grpc_request.raw_input_contents()[0].size(), binaryData.size()); + ASSERT_EQ(grpc_request.raw_input_contents()[0], binaryData); +} + TEST_F(HttpRestApiHandlerTest, binaryInputsBYTES_noBinaryDataSizeParameter) { - std::string binaryData{0x00, 0x01, 0x02, 0x03}; + std::string binaryData{0x04, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03}; std::string request_body = "{\"inputs\":[{\"name\":\"b\",\"shape\":[1],\"datatype\":\"BYTES\"}]}"; request_body += binaryData; @@ -403,15 +480,59 @@ TEST_F(HttpRestApiHandlerTest, binaryInputsBYTES_noBinaryDataSizeParameter) { assertBinaryInputsBYTES(modelName, modelVersion, grpc_request, binaryData); } +TEST_F(HttpRestApiHandlerTest, binaryInputsBYTES_noBinaryDataSizeParameter_twoInputs) { + // This scenario will fail because binary_data_size parameter is required for BYTES datatype when there are more than 1 inputs in request + std::string binaryData{0x04, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03}; + std::string request_body = "{\"inputs\":[{\"name\":\"b\",\"shape\":[1],\"datatype\":\"BYTES\"}, {\"name\":\"c\",\"shape\":[1],\"datatype\":\"BYTES\"}]}"; + request_body += binaryData; + + ::KFSRequest grpc_request; + int inferenceHeaderContentLength = (request_body.size() - binaryData.size()); + ASSERT_EQ(HttpRestApiHandler::prepareGrpcRequest(modelName, modelVersion, request_body, grpc_request, inferenceHeaderContentLength), ovms::StatusCode::REST_COULD_NOT_PARSE_INPUT); +} + +TEST_F(HttpRestApiHandlerTest, binaryInputsBYTES_dataInInvalidFormat) { + std::string binaryData{0x11, 0x11, 0x11, 0x11, 0x00, 0x01, 0x02, 0x03}; + std::string request_body = "{\"inputs\":[{\"name\":\"b\",\"shape\":[1],\"datatype\":\"BYTES\"}]}"; + request_body += binaryData; + + ::KFSRequest grpc_request; + int inferenceHeaderContentLength = (request_body.size() - binaryData.size()); + ASSERT_EQ(HttpRestApiHandler::prepareGrpcRequest(modelName, modelVersion, request_body, grpc_request, inferenceHeaderContentLength), ovms::StatusCode::OK); + // Data correctness will be checked at the stage of grpc input deserialization +} + +TEST_F(HttpRestApiHandlerTest, binaryInputsBYTES_sizeInBytesBiggerThanBuffer) { + std::string binaryData{0x16, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03}; + std::string request_body = "{\"inputs\":[{\"name\":\"b\",\"shape\":[1],\"datatype\":\"BYTES\",\"parameters\":{\"binary_data_size\":8}}]}"; + request_body += binaryData; + + ::KFSRequest grpc_request; + int inferenceHeaderContentLength = (request_body.size() - binaryData.size()); + ASSERT_EQ(HttpRestApiHandler::prepareGrpcRequest(modelName, modelVersion, request_body, grpc_request, inferenceHeaderContentLength), ovms::StatusCode::OK); + // Data correctness will be checked at the stage of grpc input deserialization +} + +TEST_F(HttpRestApiHandlerTest, binaryInputsBYTES_BinaryDataSizeBiggerThanActualBuffer) { + std::string binaryData{0x16, 0x00}; + std::string request_body = "{\"inputs\":[{\"name\":\"b\",\"shape\":[1],\"datatype\":\"BYTES\",\"parameters\":{\"binary_data_size\":8}}]}"; + request_body += binaryData; + + ::KFSRequest grpc_request; + int inferenceHeaderContentLength = (request_body.size() - binaryData.size()); + ASSERT_EQ(HttpRestApiHandler::prepareGrpcRequest(modelName, modelVersion, request_body, grpc_request, inferenceHeaderContentLength), ovms::StatusCode::REST_BINARY_BUFFER_EXCEEDED); + // Data correctness will be checked at the stage of grpc input deserialization +} + static void assertBinaryInputsINT16(const std::string& modelName, const std::optional& modelVersion, ::KFSRequest& grpc_request, std::string binaryData) { assertSingleBinaryInput(modelName, modelVersion, grpc_request); ASSERT_EQ(grpc_request.inputs()[0].datatype(), "INT16"); ASSERT_EQ(grpc_request.inputs()[0].shape()[0], 1); ASSERT_EQ(grpc_request.inputs()[0].shape()[1], 4); - int i = 0; - for (auto content : grpc_request.inputs()[0].contents().int_contents()) { - ASSERT_EQ(content, i++); + size_t i; + for (i = 0; i < (grpc_request.raw_input_contents()[0].size() / sizeof(int16_t)); i++) { + ASSERT_EQ(((int16_t*)grpc_request.raw_input_contents()[0].data())[i], i); } ASSERT_EQ(i, 4); } @@ -444,9 +565,9 @@ static void assertBinaryInputsINT32(const std::string& modelName, const std::opt ASSERT_EQ(grpc_request.inputs()[0].datatype(), "INT32"); ASSERT_EQ(grpc_request.inputs()[0].shape()[0], 1); ASSERT_EQ(grpc_request.inputs()[0].shape()[1], 4); - int i = 0; - for (auto content : grpc_request.inputs()[0].contents().int_contents()) { - ASSERT_EQ(content, i++); + size_t i; + for (i = 0; i < (grpc_request.raw_input_contents()[0].size() / sizeof(int32_t)); i++) { + ASSERT_EQ(((int32_t*)grpc_request.raw_input_contents()[0].data())[i], i); } ASSERT_EQ(i, 4); } @@ -479,9 +600,9 @@ static void assertBinaryInputsINT64(const std::string& modelName, const std::opt ASSERT_EQ(grpc_request.inputs()[0].datatype(), "INT64"); ASSERT_EQ(grpc_request.inputs()[0].shape()[0], 1); ASSERT_EQ(grpc_request.inputs()[0].shape()[1], 4); - int i = 0; - for (auto content : grpc_request.inputs()[0].contents().int64_contents()) { - ASSERT_EQ(content, i++); + size_t i; + for (i = 0; i < (grpc_request.raw_input_contents()[0].size() / sizeof(int64_t)); i++) { + ASSERT_EQ(((int64_t*)grpc_request.raw_input_contents()[0].data())[i], i); } ASSERT_EQ(i, 4); } @@ -514,9 +635,9 @@ static void assertBinaryInputsFP32(const std::string& modelName, const std::opti ASSERT_EQ(grpc_request.inputs()[0].datatype(), "FP32"); ASSERT_EQ(grpc_request.inputs()[0].shape()[0], 1); ASSERT_EQ(grpc_request.inputs()[0].shape()[1], 4); - int i = 0; - for (auto content : grpc_request.inputs()[0].contents().fp32_contents()) { - ASSERT_EQ(content, i++); + size_t i; + for (i = 0; i < (grpc_request.raw_input_contents()[0].size() / sizeof(float)); i++) { + ASSERT_EQ(((float*)grpc_request.raw_input_contents()[0].data())[i], i); } ASSERT_EQ(i, 4); } @@ -549,9 +670,9 @@ static void assertBinaryInputsFP64(const std::string& modelName, const std::opti ASSERT_EQ(grpc_request.inputs()[0].datatype(), "FP64"); ASSERT_EQ(grpc_request.inputs()[0].shape()[0], 1); ASSERT_EQ(grpc_request.inputs()[0].shape()[1], 4); - int i = 0; - for (auto content : grpc_request.inputs()[0].contents().fp64_contents()) { - ASSERT_EQ(content, i++); + size_t i; + for (i = 0; i < (grpc_request.raw_input_contents()[0].size() / sizeof(double)); i++) { + ASSERT_EQ(((double*)grpc_request.raw_input_contents()[0].data())[i], i); } ASSERT_EQ(i, 4); } @@ -607,16 +728,6 @@ TEST_F(HttpRestApiHandlerTest, binaryInputsBufferSmallerThanExpected_noBinaryDat ASSERT_EQ(HttpRestApiHandler::prepareGrpcRequest(modelName, modelVersion, request_body, grpc_request, inferenceHeaderContentLength), ovms::StatusCode::REST_BINARY_BUFFER_EXCEEDED); } -TEST_F(HttpRestApiHandlerTest, binaryInputsBytes_noBinaryDataSizeParameter_twoInputs) { - std::string binaryData{0x00, 0x00, 0x00, 0x00}; - std::string request_body = "{\"inputs\":[{\"name\":\"b\",\"shape\":[1],\"datatype\":\"BYTES\"}, {\"name\":\"a\",\"shape\":[1],\"datatype\":\"BYTES\"}]}"; - request_body += binaryData; - - ::KFSRequest grpc_request; - int inferenceHeaderContentLength = (request_body.size() - binaryData.size()); - ASSERT_EQ(HttpRestApiHandler::prepareGrpcRequest(modelName, modelVersion, request_body, grpc_request, inferenceHeaderContentLength), ovms::StatusCode::REST_COULD_NOT_PARSE_INPUT); -} - TEST_F(HttpRestApiHandlerTest, binaryInputsInvalidBinaryDataSizeParameter) { std::string binaryData{0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00}; std::string request_body = "{\"inputs\":[{\"name\":\"b\",\"shape\":[1,4],\"datatype\":\"INT32\",\"parameters\":{\"binary_data_size\":true}}]}"; @@ -628,36 +739,11 @@ TEST_F(HttpRestApiHandlerTest, binaryInputsInvalidBinaryDataSizeParameter) { } TEST_F(HttpRestApiHandlerTest, binaryInputsINT8BatchSize2) { + // Format with string binary_data_size parameter containing list of sizes is now deprecated std::string binaryData{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07}; std::string request_body = "{\"inputs\":[{\"name\":\"b\",\"shape\":[2,4],\"datatype\":\"INT8\",\"parameters\":{\"binary_data_size\":\"4, 4\"}}]}"; request_body += binaryData; - ::KFSRequest grpc_request; - int inferenceHeaderContentLength = (request_body.size() - binaryData.size()); - ASSERT_EQ(HttpRestApiHandler::prepareGrpcRequest(modelName, modelVersion, request_body, grpc_request, inferenceHeaderContentLength), ovms::StatusCode::OK); - - ASSERT_EQ(grpc_request.inputs_size(), 1); - ASSERT_EQ(grpc_request.model_name(), modelName); - ASSERT_EQ(grpc_request.model_version(), std::to_string(modelVersion.value())); - auto params = grpc_request.parameters(); - ASSERT_EQ(grpc_request.inputs()[0].name(), "b"); - ASSERT_EQ(grpc_request.inputs()[0].datatype(), "INT8"); - - ASSERT_EQ(grpc_request.inputs()[0].shape()[0], 2); - ASSERT_EQ(grpc_request.inputs()[0].shape()[1], 4); - - int i = 0; - for (auto content : grpc_request.inputs()[0].contents().int_contents()) { - ASSERT_EQ(content, i++); - } - ASSERT_EQ(i, 8); -} - -TEST_F(HttpRestApiHandlerTest, binaryInputsBinaryDataSizeStringParameterInvalid) { - std::string binaryData{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07}; - std::string request_body = "{\"inputs\":[{\"name\":\"b\",\"shape\":[2,4],\"datatype\":\"INT8\",\"parameters\":{\"binary_data_size\":\"a, 4\"}}]}"; - request_body += binaryData; - ::KFSRequest grpc_request; int inferenceHeaderContentLength = (request_body.size() - binaryData.size()); ASSERT_EQ(HttpRestApiHandler::prepareGrpcRequest(modelName, modelVersion, request_body, grpc_request, inferenceHeaderContentLength), ovms::StatusCode::REST_BINARY_DATA_SIZE_PARAMETER_INVALID); diff --git a/src/test/localfilesystem_test.cpp b/src/test/localfilesystem_test.cpp index 48ee2d29f8..9137b887bb 100644 --- a/src/test/localfilesystem_test.cpp +++ b/src/test/localfilesystem_test.cpp @@ -33,7 +33,7 @@ const std::string TMP_CONTENT = "filecontent123\r\n"; const std::string TMP_DIR1 = "dir1"; const std::string TMP_DIR2 = "dir2"; -void createTmpFiles() { +static void createTmpFiles() { std::ofstream configFile(TMP_PATH + TMP_FILE); configFile << TMP_CONTENT << std::endl; configFile.close(); @@ -158,3 +158,101 @@ TEST(FileSystem, CreateTempFolder) { EXPECT_TRUE((p & fs::perms::others_read) == fs::perms::none); EXPECT_TRUE((p & fs::perms::owner_read) != fs::perms::none); } + +TEST(FileSystem, CheckIfPathIsEscaped) { + ASSERT_TRUE(ovms::FileSystem::isPathEscaped("/../")); + ASSERT_TRUE(ovms::FileSystem::isPathEscaped("/..")); + ASSERT_TRUE(ovms::FileSystem::isPathEscaped("../")); + ASSERT_TRUE(!ovms::FileSystem::isPathEscaped("/path/..resnet/")); + ASSERT_TRUE(!ovms::FileSystem::isPathEscaped("/path/resnet../")); +} + +TEST(FileSystem, IsLocalFilesystem) { + ASSERT_TRUE(ovms::FileSystem::isLocalFilesystem("")); + ASSERT_FALSE(ovms::FileSystem::isLocalFilesystem("s3://")); + ASSERT_FALSE(ovms::FileSystem::isLocalFilesystem("gs://")); + ASSERT_FALSE(ovms::FileSystem::isLocalFilesystem("azfs://")); + ASSERT_FALSE(ovms::FileSystem::isLocalFilesystem("az://")); + ASSERT_TRUE(ovms::FileSystem::isLocalFilesystem("nanas3://")); + ASSERT_TRUE(ovms::FileSystem::isLocalFilesystem("...gs://")); + ASSERT_TRUE(ovms::FileSystem::isLocalFilesystem("/azfs://")); + ASSERT_TRUE(ovms::FileSystem::isLocalFilesystem("o_O$az://")); + ASSERT_TRUE(ovms::FileSystem::isLocalFilesystem("../")); + ASSERT_TRUE(ovms::FileSystem::isLocalFilesystem("/localfilesystem")); + ASSERT_TRUE(ovms::FileSystem::isLocalFilesystem("/long/local/filesystem")); +} + +TEST(FileSystem, SetRootDirectoryPath) { + std::string rootPath = ""; + std::string givenPath = "/givenpath"; + + ovms::FileSystem::setRootDirectoryPath(rootPath, givenPath); + ASSERT_EQ(rootPath, "/"); + + givenPath = "/givenpath/longer"; + ovms::FileSystem::setRootDirectoryPath(rootPath, givenPath); + ASSERT_EQ(rootPath, "/givenpath/"); + + givenPath = "/givenpath/longer/somefile.txt"; + ovms::FileSystem::setRootDirectoryPath(rootPath, givenPath); + ASSERT_EQ(rootPath, "/givenpath/longer/"); + + givenPath = "givenpath"; + ovms::FileSystem::setRootDirectoryPath(rootPath, givenPath); + std::string currentWorkingDir = std::filesystem::current_path(); + ASSERT_EQ(rootPath, ovms::FileSystem::joinPath({currentWorkingDir, ""})); + + givenPath = "/givenpath/"; + ovms::FileSystem::setRootDirectoryPath(rootPath, givenPath); + ASSERT_EQ(rootPath, givenPath); + + givenPath = "1"; + ovms::FileSystem::setRootDirectoryPath(rootPath, givenPath); + ASSERT_EQ(rootPath, ovms::FileSystem::joinPath({currentWorkingDir, ""})); + + givenPath = ""; + ovms::FileSystem::setRootDirectoryPath(rootPath, givenPath); + ASSERT_EQ(rootPath, ovms::FileSystem::joinPath({currentWorkingDir, ""})); +} + +TEST(FileSystem, SetPath) { + std::string rootPath = ""; + std::string testPath = ""; + std::string givenPath = ""; + + try { + ovms::FileSystem::setPath(testPath, givenPath, rootPath); + } catch (std::logic_error& e) { + } + + rootPath = "/rootPath"; + testPath = ""; + givenPath = ""; + + ovms::FileSystem::setPath(testPath, givenPath, rootPath); + ASSERT_EQ(testPath, rootPath); + + testPath = ""; + givenPath = "/givenPath"; + + ovms::FileSystem::setPath(testPath, givenPath, rootPath); + ASSERT_EQ(testPath, "/givenPath"); + + testPath = ""; + givenPath = "givenPath"; + + ovms::FileSystem::setPath(testPath, givenPath, rootPath); + ASSERT_EQ(testPath, "/rootPathgivenPath"); + + testPath = ""; + givenPath = "long/givenPath"; + + ovms::FileSystem::setPath(testPath, givenPath, rootPath); + ASSERT_EQ(testPath, "/rootPathlong/givenPath"); + + testPath = ""; + givenPath = "s3://long/givenPath"; + + ovms::FileSystem::setPath(testPath, givenPath, rootPath); + ASSERT_EQ(testPath, givenPath); +} diff --git a/src/test/mediapipe/config_mediapipe_add_adapter_full.json b/src/test/mediapipe/config_mediapipe_add_adapter_full.json new file mode 100644 index 0000000000..abc2cf49a7 --- /dev/null +++ b/src/test/mediapipe/config_mediapipe_add_adapter_full.json @@ -0,0 +1,20 @@ +{ + "model_config_list": [ + {"config": { + "name": "add", + "base_path": "/ovms/src/test/add_two_inputs_model", + "shape": {"input1":"(1, 10)"} + } + } + ], + "mediapipe_config_list": [ + { + "name":"mediapipeAddADAPTFULL", + "graph_path":"/ovms/src/test/mediapipe/graphaddadapterfull.pbtxt" + }, + { + "name":"mediapipeAdd", + "graph_path":"/ovms/src/test/mediapipe/graphadd.pbtxt" + } + ] +} diff --git a/src/test/mediapipe/config_mediapipe_all_graphs_adapter_full.json b/src/test/mediapipe/config_mediapipe_all_graphs_adapter_full.json new file mode 100644 index 0000000000..78e95c5199 --- /dev/null +++ b/src/test/mediapipe/config_mediapipe_all_graphs_adapter_full.json @@ -0,0 +1,38 @@ +{ + "model_config_list": [ + { + "config": { + "name": "dummy", + "base_path": "/ovms/src/test/dummy", + "shape": "(1, 10)" + } + }, + { + "config": { + "name": "add", + "base_path": "/ovms/src/test/add_two_inputs_model", + "shape": { + "input1": "(1, 10)" + } + } + } + ], + "mediapipe_config_list": [ + { + "name": "mediapipeDummyADAPTFULL", + "graph_path": "/ovms/src/test/mediapipe/graphdummyadapterfull.pbtxt" + }, + { + "name": "mediapipeDummy", + "graph_path": "/ovms/src/test/mediapipe/graphdummy.pbtxt" + }, + { + "name":"mediapipeAddADAPTFULL", + "graph_path":"/ovms/src/test/mediapipe/graphaddadapterfull.pbtxt" + }, + { + "name":"mediapipeAdd", + "graph_path":"/ovms/src/test/mediapipe/graphadd.pbtxt" + } + ] +} \ No newline at end of file diff --git a/src/test/mediapipe/config_mediapipe_dummy_adapter_default_subconfig.json b/src/test/mediapipe/config_mediapipe_dummy_adapter_default_subconfig.json new file mode 100644 index 0000000000..bffb7f48be --- /dev/null +++ b/src/test/mediapipe/config_mediapipe_dummy_adapter_default_subconfig.json @@ -0,0 +1,10 @@ +{ + "model_config_list": [], + "mediapipe_config_list": [ + { + "name":"mediaDummy", + "graph_path":"/ovms/src/test/mediapipe/graphdummy.pbtxt", + "base_path":"/ovms/src/test/mediapipe" + } + ] +} diff --git a/src/test/mediapipe/config_mediapipe_dummy_adapter_full.json b/src/test/mediapipe/config_mediapipe_dummy_adapter_full.json new file mode 100644 index 0000000000..3e536fff50 --- /dev/null +++ b/src/test/mediapipe/config_mediapipe_dummy_adapter_full.json @@ -0,0 +1,20 @@ +{ + "model_config_list": [ + {"config": { + "name": "dummy", + "base_path": "/ovms/src/test/dummy", + "shape": "(1, 10)" + } + } + ], + "mediapipe_config_list": [ + { + "name":"mediaDummyADAPTFULL", + "graph_path":"/ovms/src/test/mediapipe/graphdummyadapterfull.pbtxt" + }, + { + "name":"mediaDummy", + "graph_path":"/ovms/src/test/mediapipe/graphdummy.pbtxt" + } + ] +} diff --git a/src/test/mediapipe/config_mediapipe_dummy_adapter_full_dag.json b/src/test/mediapipe/config_mediapipe_dummy_adapter_full_dag.json new file mode 100644 index 0000000000..f2e851195d --- /dev/null +++ b/src/test/mediapipe/config_mediapipe_dummy_adapter_full_dag.json @@ -0,0 +1,46 @@ +{ + "model_config_list": [ + {"config": { + "name": "dummy", + "base_path": "/ovms/src/test/dummy", + "shape": "(1, 10)" + } + } + ], + "mediapipe_config_list": [ + { + "name":"mediaDummyADAPTFULL", + "graph_path":"/ovms/src/test/mediapipe/graphdummyadapterfull.pbtxt" + }, + { + "name":"mediaDummy", + "graph_path":"/ovms/src/test/mediapipe/graphdummy.pbtxt" + } + ], + "pipeline_config_list": [ + { + "name": "dummyDAG", + "inputs": ["b"], + "nodes": [ + { + "name": "dummyNode", + "model_name": "dummy", + "type": "DL model", + "inputs": [ + {"b": {"node_name": "request", + "data_item": "b"}} + ], + "outputs": [ + {"data_item": "a", + "alias": "new_dummy_output"} + ] + } + ], + "outputs": [ + {"a": {"node_name": "dummyNode", + "data_item": "new_dummy_output"} + } + ] + } + ] +} diff --git a/src/test/mediapipe/config_mediapipe_dummy_adapter_full_dummy_in_both_config_and_subconfig.json b/src/test/mediapipe/config_mediapipe_dummy_adapter_full_dummy_in_both_config_and_subconfig.json new file mode 100644 index 0000000000..0dd6fe08d7 --- /dev/null +++ b/src/test/mediapipe/config_mediapipe_dummy_adapter_full_dummy_in_both_config_and_subconfig.json @@ -0,0 +1,22 @@ +{ + "model_config_list": [ + {"config": { + "name": "dummy", + "base_path": "/ovms/src/test/dummy", + "shape": "(1, 12)" + } + } + ], + "mediapipe_config_list": [ + { + "name":"mediaDummyADAPTFULL", + "graph_path":"/ovms/src/test/mediapipe/graphdummyadapterfull.pbtxt", + "subconfig":"/ovms/src/test/mediapipe/config_standard_dummy.json" + }, + { + "name":"mediaDummy", + "graph_path":"/ovms/src/test/mediapipe/graphdummy.pbtxt", + "subconfig":"/ovms/src/test/mediapipe/config_standard_dummy.json" + } + ] +} diff --git a/src/test/mediapipe/config_mediapipe_dummy_adapter_full_no_graph_path.json b/src/test/mediapipe/config_mediapipe_dummy_adapter_full_no_graph_path.json new file mode 100644 index 0000000000..b34e1d915d --- /dev/null +++ b/src/test/mediapipe/config_mediapipe_dummy_adapter_full_no_graph_path.json @@ -0,0 +1,20 @@ +{ + "model_config_list": [ + {"config": { + "name": "dummy", + "base_path": "/ovms/src/test/dummy", + "shape": "(1, 10)" + } + } + ], + "mediapipe_config_list": [ + { + "name":"graphdummyadapterfull", + "base_path":"/ovms/src/test/mediapipe" + }, + { + "name":"graphdummy", + "base_path":"/ovms/src/test/mediapipe" + } + ] +} diff --git a/src/test/mediapipe/config_mediapipe_dummy_adapter_full_only_name_specified.json b/src/test/mediapipe/config_mediapipe_dummy_adapter_full_only_name_specified.json new file mode 100644 index 0000000000..91cd49948d --- /dev/null +++ b/src/test/mediapipe/config_mediapipe_dummy_adapter_full_only_name_specified.json @@ -0,0 +1,8 @@ +{ + "model_config_list": [], + "mediapipe_config_list": [ + { + "name":"graphdummy" + } + ] +} diff --git a/src/test/mediapipe/config_mediapipe_dummy_adapter_full_relative_to_base_path.json b/src/test/mediapipe/config_mediapipe_dummy_adapter_full_relative_to_base_path.json new file mode 100644 index 0000000000..4deb480c62 --- /dev/null +++ b/src/test/mediapipe/config_mediapipe_dummy_adapter_full_relative_to_base_path.json @@ -0,0 +1,18 @@ +{ + "model_config_list": [ + {"config": { + "name": "dummy", + "base_path": "/ovms/src/test/dummy", + "shape": "(1, 10)" + } + } + ], + "mediapipe_config_list": [ + { + "name":"mediaDummy", + "base_path":"/ovms/src/test/mediapipe/relative_paths", + "graph_path":"../graphdummy.pbtxt", + "subconfig":"graph1/subconfig.json" + } + ] +} diff --git a/src/test/mediapipe/config_mediapipe_dummy_adapter_full_subconfig.json b/src/test/mediapipe/config_mediapipe_dummy_adapter_full_subconfig.json new file mode 100644 index 0000000000..b355f40767 --- /dev/null +++ b/src/test/mediapipe/config_mediapipe_dummy_adapter_full_subconfig.json @@ -0,0 +1,15 @@ +{ + "model_config_list": [], + "mediapipe_config_list": [ + { + "name":"mediaDummyADAPTFULL", + "graph_path":"/ovms/src/test/mediapipe/graphdummyadapterfull.pbtxt", + "subconfig":"/ovms/src/test/mediapipe/config_standard_dummy.json" + }, + { + "name":"mediaDummy", + "graph_path":"/ovms/src/test/mediapipe/graphdummy.pbtxt", + "subconfig":"/ovms/src/test/mediapipe/config_standard_dummy.json" + } + ] +} diff --git a/src/test/mediapipe/config_mediapipe_graph_with_side_packets.json b/src/test/mediapipe/config_mediapipe_graph_with_side_packets.json new file mode 100644 index 0000000000..3bf821ff68 --- /dev/null +++ b/src/test/mediapipe/config_mediapipe_graph_with_side_packets.json @@ -0,0 +1,10 @@ +{ + "model_config_list": [ + ], + "mediapipe_config_list": [ + { + "name":"mediaWithParams", + "graph_path":"/ovms/src/test/mediapipe/graphWithParams.pbtxt" + } + ] +} diff --git a/src/test/mediapipe/config_standard_add.json b/src/test/mediapipe/config_standard_add.json new file mode 100644 index 0000000000..e9976f5f8f --- /dev/null +++ b/src/test/mediapipe/config_standard_add.json @@ -0,0 +1,10 @@ +{ + "model_config_list": [ + {"config": { + "name": "add", + "base_path": "/ovms/src/test/add_two_inputs_model", + "shape": {"input1":"(1, 10)"} + } + } + ] +} diff --git a/src/test/mediapipe/config_standard_dummy.json b/src/test/mediapipe/config_standard_dummy.json new file mode 100644 index 0000000000..6fe2321258 --- /dev/null +++ b/src/test/mediapipe/config_standard_dummy.json @@ -0,0 +1,10 @@ +{ + "model_config_list": [ + {"config": { + "name": "dummy", + "base_path": "/ovms/src/test/dummy", + "shape": "(1, 10)" + } + } + ] +} diff --git a/src/test/mediapipe/graph.pbtxt b/src/test/mediapipe/graph.pbtxt new file mode 100644 index 0000000000..f6105187cb --- /dev/null +++ b/src/test/mediapipe/graph.pbtxt @@ -0,0 +1,36 @@ +# +# Copyright 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +input_stream: "in" +output_stream: "out" +node { + calculator: "OVMSOVCalculator" + input_stream: "B:in" + output_stream: "A:out" + node_options: { + [type.googleapis.com / mediapipe.OVMSCalculatorOptions]: { + servable_name: "dummy" + servable_version: "1" + tag_to_input_tensor_names { + key: "B" + value: "b" + } + tag_to_output_tensor_names { + key: "A" + value: "a" + } + } + } +} diff --git a/src/test/mediapipe/graphWithParams.pbtxt b/src/test/mediapipe/graphWithParams.pbtxt new file mode 100644 index 0000000000..182dd07fbe --- /dev/null +++ b/src/test/mediapipe/graphWithParams.pbtxt @@ -0,0 +1,30 @@ +# +# Copyright 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +input_stream: "in_not_used" +output_stream: "out_string" +output_stream: "out_int64" +output_stream: "out_bool" +node { + calculator: "InputSidePacketUserTestCalc" + input_side_packet: "INPUT_SIDE_PACKET_STRING:string_param" + input_side_packet: "INPUT_SIDE_PACKET_INT64:int64_param" + input_side_packet: "INPUT_SIDE_PACKET_BOOL:bool_param" + input_stream: "INPUT_FP32:in_not_used" + output_stream: "OUTPUT_UINT8:out_string" + output_stream: "OUTPUT_INT64:out_int64" + output_stream: "OUTPUT_BOOL:out_bool" +} + diff --git a/src/test/mediapipe/graphadd.pbtxt b/src/test/mediapipe/graphadd.pbtxt new file mode 100644 index 0000000000..394e2a0874 --- /dev/null +++ b/src/test/mediapipe/graphadd.pbtxt @@ -0,0 +1,42 @@ +# +# Copyright 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +input_stream: "in1" +input_stream: "in2" +output_stream: "out" +node { + calculator: "OVMSOVCalculator" + input_stream: "INPUT1:in1" + input_stream: "INPUT2:in2" + output_stream: "SUM:out" + node_options: { + [type.googleapis.com / mediapipe.OVMSCalculatorOptions]: { + servable_name: "add" + servable_version: "1" + tag_to_input_tensor_names { + key: "INPUT1" + value: "input1" + } + tag_to_input_tensor_names { + key: "INPUT2" + value: "input2" + } + tag_to_output_tensor_names { + key: "SUM" + value: "sum" + } + } + } +} diff --git a/src/test/mediapipe/graphaddadapterfull.pbtxt b/src/test/mediapipe/graphaddadapterfull.pbtxt new file mode 100644 index 0000000000..33515625d1 --- /dev/null +++ b/src/test/mediapipe/graphaddadapterfull.pbtxt @@ -0,0 +1,52 @@ +# +# Copyright 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +input_stream: "in1" +input_stream: "in2" +output_stream: "out" +node { + calculator: "ModelAPISessionCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com / mediapipe.ModelAPIOVMSSessionCalculatorOptions]: { + servable_name: "add" + servable_version: "1" + } + } +} +node { + calculator: "ModelAPISideFeedCalculator" + input_side_packet: "SESSION:session" + input_stream: "INPUT1:in1" + input_stream: "INPUT2:in2" + output_stream: "SUM:out" + node_options: { + [type.googleapis.com / mediapipe.ModelAPIInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "INPUT1" + value: "input1" + } + tag_to_input_tensor_names { + key: "INPUT2" + value: "input2" + } + tag_to_output_tensor_names { + key: "SUM" + value: "sum" + } + } + } +} + diff --git a/src/test/mediapipe/graphdummy.pbtxt b/src/test/mediapipe/graphdummy.pbtxt new file mode 100644 index 0000000000..f6105187cb --- /dev/null +++ b/src/test/mediapipe/graphdummy.pbtxt @@ -0,0 +1,36 @@ +# +# Copyright 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +input_stream: "in" +output_stream: "out" +node { + calculator: "OVMSOVCalculator" + input_stream: "B:in" + output_stream: "A:out" + node_options: { + [type.googleapis.com / mediapipe.OVMSCalculatorOptions]: { + servable_name: "dummy" + servable_version: "1" + tag_to_input_tensor_names { + key: "B" + value: "b" + } + tag_to_output_tensor_names { + key: "A" + value: "a" + } + } + } +} diff --git a/src/test/mediapipe/graphdummyadapterfull.pbtxt b/src/test/mediapipe/graphdummyadapterfull.pbtxt new file mode 100644 index 0000000000..acd4a05118 --- /dev/null +++ b/src/test/mediapipe/graphdummyadapterfull.pbtxt @@ -0,0 +1,46 @@ +# +# Copyright 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +input_stream: "in" +output_stream: "out" +node { + calculator: "ModelAPISessionCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com / mediapipe.ModelAPIOVMSSessionCalculatorOptions]: { + servable_name: "dummy" + servable_version: "1" + } + } +} +node { + calculator: "ModelAPISideFeedCalculator" + input_side_packet: "SESSION:session" + input_stream: "B:in" + output_stream: "A:out" + node_options: { + [type.googleapis.com / mediapipe.ModelAPIInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "B" + value: "b" + } + tag_to_output_tensor_names { + key: "A" + value: "a" + } + } + } +} + diff --git a/src/test/mediapipe/graphdummyadapterfull_dummyinputnames.pbtxt b/src/test/mediapipe/graphdummyadapterfull_dummyinputnames.pbtxt new file mode 100644 index 0000000000..10878c39d4 --- /dev/null +++ b/src/test/mediapipe/graphdummyadapterfull_dummyinputnames.pbtxt @@ -0,0 +1,46 @@ +# +# Copyright 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +input_stream: "custom_dummy_input" +output_stream: "custom_dummy_output" +node { + calculator: "ModelAPISessionCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com / mediapipe.ModelAPIOVMSSessionCalculatorOptions]: { + servable_name: "dummy" + servable_version: "1" + } + } +} +node { + calculator: "ModelAPISideFeedCalculator" + input_side_packet: "SESSION:session" + input_stream: "B:custom_dummy_input" + output_stream: "A:custom_dummy_output" + node_options: { + [type.googleapis.com / mediapipe.ModelAPIInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "B" + value: "b" + } + tag_to_output_tensor_names { + key: "A" + value: "a" + } + } + } +} + diff --git a/src/test/mediapipe/graphdummyadapterfull_dummyinputnames2.pbtxt b/src/test/mediapipe/graphdummyadapterfull_dummyinputnames2.pbtxt new file mode 100644 index 0000000000..10878c39d4 --- /dev/null +++ b/src/test/mediapipe/graphdummyadapterfull_dummyinputnames2.pbtxt @@ -0,0 +1,46 @@ +# +# Copyright 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +input_stream: "custom_dummy_input" +output_stream: "custom_dummy_output" +node { + calculator: "ModelAPISessionCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com / mediapipe.ModelAPIOVMSSessionCalculatorOptions]: { + servable_name: "dummy" + servable_version: "1" + } + } +} +node { + calculator: "ModelAPISideFeedCalculator" + input_side_packet: "SESSION:session" + input_stream: "B:custom_dummy_input" + output_stream: "A:custom_dummy_output" + node_options: { + [type.googleapis.com / mediapipe.ModelAPIInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "B" + value: "b" + } + tag_to_output_tensor_names { + key: "A" + value: "a" + } + } + } +} + diff --git a/src/test/mediapipe/inputsidepacketusertestcalc.cc b/src/test/mediapipe/inputsidepacketusertestcalc.cc new file mode 100644 index 0000000000..5e819f0de2 --- /dev/null +++ b/src/test/mediapipe/inputsidepacketusertestcalc.cc @@ -0,0 +1,114 @@ +//***************************************************************************** +// Copyright 2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include +#include +#include + +#include + +#include "../../stringutils.hpp" +#include "mediapipe/framework/calculator_framework.h" +#include "mediapipe/framework/port/canonical_errors.h" +namespace mediapipe { +#define MLOG(A) LOG(ERROR) << " " << A << std::endl; +using std::endl; + +const std::string ISP_STRING{"INPUT_SIDE_PACKET_STRING"}; +const std::string ISP_INT64{"INPUT_SIDE_PACKET_INT64"}; +const std::string ISP_BOOL{"INPUT_SIDE_PACKET_BOOL"}; +const std::string IN_FP32_TAG{"INPUT_FP32"}; +const std::string INT32_TAG{"OUTPUT_UINT8"}; +const std::string INT64_TAG{"OUTPUT_INT64"}; +const std::string BOOL_TAG{"OUTPUT_BOOL"}; + +class InputSidePacketUserTestCalc : public CalculatorBase { + std::string stringParam; + bool boolParam; + int64_t int64Param; + std::unordered_map outputNameToTag; // TODO move to Open(); + +public: + static absl::Status GetContract(CalculatorContract* cc) { + MLOG("InputSidePacketUserTestCalc GetContract start"); + RET_CHECK(!cc->Inputs().GetTags().empty()); + RET_CHECK(!cc->Outputs().GetTags().empty()); + cc->Inputs().Tag(IN_FP32_TAG).Set(); + cc->Outputs().Tag(INT32_TAG).Set(); + cc->Outputs().Tag(INT64_TAG).Set(); + cc->Outputs().Tag(BOOL_TAG).Set(); + cc->InputSidePackets().Tag(ISP_STRING.c_str()).Set(); + cc->InputSidePackets().Tag(ISP_INT64.c_str()).Set(); + cc->InputSidePackets().Tag(ISP_BOOL.c_str()).Set(); + MLOG("InputSidePacketUserTestCalc GetContract end"); + return absl::OkStatus(); + } + + absl::Status Close(CalculatorContext* cc) final { + MLOG("InputSidePacketUserTestCalc Close"); + return absl::OkStatus(); + } + absl::Status Open(CalculatorContext* cc) final { + MLOG("InputSidePacketUserTestCalc Open start"); + stringParam = cc->InputSidePackets() + .Tag(ISP_STRING.c_str()) + .Get(); + boolParam = cc->InputSidePackets() + .Tag(ISP_BOOL.c_str()) + .Get(); + int64Param = cc->InputSidePackets() + .Tag(ISP_INT64.c_str()) + .Get(); + for (CollectionItemId id = cc->Inputs().BeginId(); + id < cc->Inputs().EndId(); ++id) { + if (!cc->Inputs().Get(id).Header().IsEmpty()) { + cc->Outputs().Get(id).SetHeader(cc->Inputs().Get(id).Header()); + } + } + if (cc->OutputSidePackets().NumEntries() != 0) { + for (CollectionItemId id = cc->InputSidePackets().BeginId(); + id < cc->InputSidePackets().EndId(); ++id) { + cc->OutputSidePackets().Get(id).Set(cc->InputSidePackets().Get(id)); + } + } + cc->SetOffset(TimestampDiff(0)); + MLOG("InputSidePacketUserTestCalc Open end"); + return absl::OkStatus(); + } + + absl::Status Process(CalculatorContext* cc) final { + MLOG("InputSidePacketUserTestCalc process start"); + if (cc->Inputs().NumEntries() == 0) { + return tool::StatusStop(); + } + ov::Tensor* intTensor = new ov::Tensor(ov::element::Type_t::i64, {1}); + cc->Outputs().Tag(INT64_TAG).Add(intTensor, cc->InputTimestamp()); + *((int64_t*)intTensor->data()) = int64Param; + std::cout << intTensor->get_byte_size() << std::endl; + ov::Tensor* boolTensor = new ov::Tensor(ov::element::Type_t::boolean, {1}); + *((bool*)boolTensor->data()) = boolParam; + std::cout << boolTensor->get_byte_size() << std::endl; + cc->Outputs().Tag(BOOL_TAG).Add(boolTensor, cc->InputTimestamp()); + // there is no string to/from tensor in ovms + ov::Tensor* stringTensor = new ov::Tensor(ov::element::Type_t::u8, {stringParam.size()}); + std::memcpy(stringTensor->data(), stringParam.data(), stringParam.size()); + cc->Outputs().Tag(INT32_TAG).Add(stringTensor, cc->InputTimestamp()); + MLOG("InputSidePacketUserTestCalc process end"); + return absl::OkStatus(); + } +}; + +REGISTER_CALCULATOR(InputSidePacketUserTestCalc); +} // namespace mediapipe diff --git a/src/test/mediapipe/relative_paths/config_relative_add_subconfig.json b/src/test/mediapipe/relative_paths/config_relative_add_subconfig.json new file mode 100644 index 0000000000..d10e01fc51 --- /dev/null +++ b/src/test/mediapipe/relative_paths/config_relative_add_subconfig.json @@ -0,0 +1,15 @@ +{ + "model_config_list": [], + "mediapipe_config_list": [ + { + "name":"mediapipeAddADAPTFULL", + "graph_path":"graph1/graphaddadapterfull.pbtxt", + "subconfig":"graph1/subconfig.json" + }, + { + "name":"mediapipeAddADAPT", + "graph_path":"graph2/graphadd.pbtxt", + "subconfig":"graph2/subconfig.json" + } + ] +} diff --git a/src/test/mediapipe/relative_paths/config_relative_add_subconfig_negative.json b/src/test/mediapipe/relative_paths/config_relative_add_subconfig_negative.json new file mode 100644 index 0000000000..de8b6df921 --- /dev/null +++ b/src/test/mediapipe/relative_paths/config_relative_add_subconfig_negative.json @@ -0,0 +1,15 @@ +{ + "model_config_list": [], + "mediapipe_config_list": [ + { + "name":"mediapipeAddADAPTFULL", + "graph_path":"graph1/graphaddadapterfull.pbtxt", + "subconfig":"graph3/subconfig.json" + }, + { + "name":"mediapipeAddADAPT", + "graph_path":"graph2/graphadd.pbtxt", + "subconfig":"graph4/subconfig.json" + } + ] +} diff --git a/src/test/mediapipe/relative_paths/config_relative_dummy.json b/src/test/mediapipe/relative_paths/config_relative_dummy.json new file mode 100644 index 0000000000..628445d22e --- /dev/null +++ b/src/test/mediapipe/relative_paths/config_relative_dummy.json @@ -0,0 +1,25 @@ +{ + "model_config_list": [ + {"config": { + "name": "dummy1", + "base_path": "graph1/dummy1", + "shape": "(1, 10)" + }, + "config": { + "name": "dummy2", + "base_path": "graph2/dummy2", + "shape": "(1, 10)" + } + } + ], + "mediapipe_config_list": [ + { + "name":"mediapipeAddADAPTFULL", + "graph_path":"graph1/graphaddadapterfull.pbtxt" + }, + { + "name":"mediapipeAddADAPT", + "graph_path":"graph2/graphadd.pbtxt" + } + ] +} diff --git a/src/test/mediapipe/relative_paths/config_relative_dummy_negative.json b/src/test/mediapipe/relative_paths/config_relative_dummy_negative.json new file mode 100644 index 0000000000..56feef1041 --- /dev/null +++ b/src/test/mediapipe/relative_paths/config_relative_dummy_negative.json @@ -0,0 +1,25 @@ +{ + "model_config_list": [ + {"config": { + "name": "dummy1", + "base_path": "dummy1", + "shape": "(1, 10)" + }, + "config": { + "name": "dummy2", + "base_path": "dummy2", + "shape": "(1, 10)" + } + } + ], + "mediapipe_config_list": [ + { + "name":"mediapipeAddADAPTFULL", + "graph_path":"graph3/graphaddadapterfull.pbtxt" + }, + { + "name":"mediapipeAddADAPT", + "graph_path":"graph4/graphaddadapter.pbtxt" + } + ] +} diff --git a/src/test/mediapipe/relative_paths/config_relative_dummy_subconfig_base_path.json b/src/test/mediapipe/relative_paths/config_relative_dummy_subconfig_base_path.json new file mode 100644 index 0000000000..5c55fc99b4 --- /dev/null +++ b/src/test/mediapipe/relative_paths/config_relative_dummy_subconfig_base_path.json @@ -0,0 +1,13 @@ +{ + "model_config_list": [], + "mediapipe_config_list": [ + { + "name":"graphaddadapterfull", + "base_path":"graph1" + }, + { + "name":"graphadd", + "base_path":"graph2" + } + ] +} diff --git a/src/test/mediapipe/relative_paths/graph1/dummy1/1/dummy.bin b/src/test/mediapipe/relative_paths/graph1/dummy1/1/dummy.bin new file mode 100644 index 0000000000..9c12e8adf9 Binary files /dev/null and b/src/test/mediapipe/relative_paths/graph1/dummy1/1/dummy.bin differ diff --git a/src/test/mediapipe/relative_paths/graph1/dummy1/1/dummy.xml b/src/test/mediapipe/relative_paths/graph1/dummy1/1/dummy.xml new file mode 100644 index 0000000000..8df1493123 --- /dev/null +++ b/src/test/mediapipe/relative_paths/graph1/dummy1/1/dummy.xml @@ -0,0 +1,99 @@ + + + + + + + + 1 + 10 + + + + + + + + 1 + 1 + + + + + + + 1 + 10 + + + 1 + 1 + + + + + 1 + 10 + + + + + + + 1 + 10 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/test/mediapipe/relative_paths/graph1/graph.pbtxt b/src/test/mediapipe/relative_paths/graph1/graph.pbtxt new file mode 100644 index 0000000000..33515625d1 --- /dev/null +++ b/src/test/mediapipe/relative_paths/graph1/graph.pbtxt @@ -0,0 +1,52 @@ +# +# Copyright 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +input_stream: "in1" +input_stream: "in2" +output_stream: "out" +node { + calculator: "ModelAPISessionCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com / mediapipe.ModelAPIOVMSSessionCalculatorOptions]: { + servable_name: "add" + servable_version: "1" + } + } +} +node { + calculator: "ModelAPISideFeedCalculator" + input_side_packet: "SESSION:session" + input_stream: "INPUT1:in1" + input_stream: "INPUT2:in2" + output_stream: "SUM:out" + node_options: { + [type.googleapis.com / mediapipe.ModelAPIInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "INPUT1" + value: "input1" + } + tag_to_input_tensor_names { + key: "INPUT2" + value: "input2" + } + tag_to_output_tensor_names { + key: "SUM" + value: "sum" + } + } + } +} + diff --git a/src/test/mediapipe/relative_paths/graph1/graphaddadapterfull.pbtxt b/src/test/mediapipe/relative_paths/graph1/graphaddadapterfull.pbtxt new file mode 100644 index 0000000000..33515625d1 --- /dev/null +++ b/src/test/mediapipe/relative_paths/graph1/graphaddadapterfull.pbtxt @@ -0,0 +1,52 @@ +# +# Copyright 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +input_stream: "in1" +input_stream: "in2" +output_stream: "out" +node { + calculator: "ModelAPISessionCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com / mediapipe.ModelAPIOVMSSessionCalculatorOptions]: { + servable_name: "add" + servable_version: "1" + } + } +} +node { + calculator: "ModelAPISideFeedCalculator" + input_side_packet: "SESSION:session" + input_stream: "INPUT1:in1" + input_stream: "INPUT2:in2" + output_stream: "SUM:out" + node_options: { + [type.googleapis.com / mediapipe.ModelAPIInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "INPUT1" + value: "input1" + } + tag_to_input_tensor_names { + key: "INPUT2" + value: "input2" + } + tag_to_output_tensor_names { + key: "SUM" + value: "sum" + } + } + } +} + diff --git a/src/test/mediapipe/relative_paths/graph1/subconfig.json b/src/test/mediapipe/relative_paths/graph1/subconfig.json new file mode 100644 index 0000000000..8ea822ebec --- /dev/null +++ b/src/test/mediapipe/relative_paths/graph1/subconfig.json @@ -0,0 +1,10 @@ +{ + "model_config_list": [ + {"config": { + "name": "dummy1", + "base_path": "dummy1", + "shape": "(1, 10)" + } + } + ] +} \ No newline at end of file diff --git a/src/test/mediapipe/relative_paths/graph2/dummy2/1/dummy.bin b/src/test/mediapipe/relative_paths/graph2/dummy2/1/dummy.bin new file mode 100644 index 0000000000..9c12e8adf9 Binary files /dev/null and b/src/test/mediapipe/relative_paths/graph2/dummy2/1/dummy.bin differ diff --git a/src/test/mediapipe/relative_paths/graph2/dummy2/1/dummy.xml b/src/test/mediapipe/relative_paths/graph2/dummy2/1/dummy.xml new file mode 100644 index 0000000000..8df1493123 --- /dev/null +++ b/src/test/mediapipe/relative_paths/graph2/dummy2/1/dummy.xml @@ -0,0 +1,99 @@ + + + + + + + + 1 + 10 + + + + + + + + 1 + 1 + + + + + + + 1 + 10 + + + 1 + 1 + + + + + 1 + 10 + + + + + + + 1 + 10 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/test/mediapipe/relative_paths/graph2/graph.pbtxt b/src/test/mediapipe/relative_paths/graph2/graph.pbtxt new file mode 100644 index 0000000000..394e2a0874 --- /dev/null +++ b/src/test/mediapipe/relative_paths/graph2/graph.pbtxt @@ -0,0 +1,42 @@ +# +# Copyright 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +input_stream: "in1" +input_stream: "in2" +output_stream: "out" +node { + calculator: "OVMSOVCalculator" + input_stream: "INPUT1:in1" + input_stream: "INPUT2:in2" + output_stream: "SUM:out" + node_options: { + [type.googleapis.com / mediapipe.OVMSCalculatorOptions]: { + servable_name: "add" + servable_version: "1" + tag_to_input_tensor_names { + key: "INPUT1" + value: "input1" + } + tag_to_input_tensor_names { + key: "INPUT2" + value: "input2" + } + tag_to_output_tensor_names { + key: "SUM" + value: "sum" + } + } + } +} diff --git a/src/test/mediapipe/relative_paths/graph2/graphadd.pbtxt b/src/test/mediapipe/relative_paths/graph2/graphadd.pbtxt new file mode 100644 index 0000000000..394e2a0874 --- /dev/null +++ b/src/test/mediapipe/relative_paths/graph2/graphadd.pbtxt @@ -0,0 +1,42 @@ +# +# Copyright 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +input_stream: "in1" +input_stream: "in2" +output_stream: "out" +node { + calculator: "OVMSOVCalculator" + input_stream: "INPUT1:in1" + input_stream: "INPUT2:in2" + output_stream: "SUM:out" + node_options: { + [type.googleapis.com / mediapipe.OVMSCalculatorOptions]: { + servable_name: "add" + servable_version: "1" + tag_to_input_tensor_names { + key: "INPUT1" + value: "input1" + } + tag_to_input_tensor_names { + key: "INPUT2" + value: "input2" + } + tag_to_output_tensor_names { + key: "SUM" + value: "sum" + } + } + } +} diff --git a/src/test/mediapipe/relative_paths/graph2/subconfig.json b/src/test/mediapipe/relative_paths/graph2/subconfig.json new file mode 100644 index 0000000000..b153edfac7 --- /dev/null +++ b/src/test/mediapipe/relative_paths/graph2/subconfig.json @@ -0,0 +1,10 @@ +{ + "model_config_list": [ + {"config": { + "name": "dummy2", + "base_path": "dummy2", + "shape": "(1, 10)" + } + } + ] +} \ No newline at end of file diff --git a/src/test/mediapipe/subconfig.json b/src/test/mediapipe/subconfig.json new file mode 100644 index 0000000000..6fe2321258 --- /dev/null +++ b/src/test/mediapipe/subconfig.json @@ -0,0 +1,10 @@ +{ + "model_config_list": [ + {"config": { + "name": "dummy", + "base_path": "/ovms/src/test/dummy", + "shape": "(1, 10)" + } + } + ] +} diff --git a/src/test/mediapipe_validation_test.cpp b/src/test/mediapipe_validation_test.cpp new file mode 100644 index 0000000000..6f357400d0 --- /dev/null +++ b/src/test/mediapipe_validation_test.cpp @@ -0,0 +1,176 @@ +//***************************************************************************** +// Copyright 2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include +#include +#include +#include + +#include +#include + +#include "../grpcservermodule.hpp" +#include "../kfs_frontend/kfs_grpc_inference_service.hpp" +#include "../mediapipe_internal/mediapipegraphexecutor.hpp" +#include "../precision.hpp" +#include "../server.hpp" +#include "test_utils.hpp" + +using namespace ovms; + +class MediapipeValidationTest : public ::testing::Test { +public: + static std::unique_ptr thread; + + KFSInferenceServiceImpl* impl; + ::KFSRequest request; + ::KFSResponse response; + + const Precision precision = Precision::FP32; + static void SetUpServer(const char* configPath) { + ovms::Server& server = ovms::Server::instance(); + server.setShutdownRequest(0); + std::string port = "9187"; + randomizePort(port); + char* argv[] = {(char*)"ovms", + (char*)"--config_path", + (char*)configPath, + (char*)"--port", + (char*)port.c_str()}; + int argc = 5; + thread.reset(new std::thread([&argc, &argv, &server]() { + EXPECT_EQ(EXIT_SUCCESS, server.start(argc, argv)); + })); + auto start = std::chrono::high_resolution_clock::now(); + while ((server.getModuleState(SERVABLE_MANAGER_MODULE_NAME) != ovms::ModuleState::INITIALIZED) && + (!server.isReady()) && + (std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start).count() < 5)) { + } + } + void prepareSingleInput() { + request.Clear(); + response.Clear(); + inputs_info_t inputsMeta{{"in", {DUMMY_MODEL_SHAPE, precision}}}; + preparePredictRequest(request, inputsMeta); + request.mutable_model_name()->assign("mediapipeDummyADAPTFULL"); + } + void prepareDoubleInput() { + request.Clear(); + response.Clear(); + inputs_info_t inputsMeta{ + {"in1", {DUMMY_MODEL_SHAPE, precision}}, + {"in2", {DUMMY_MODEL_SHAPE, precision}}}; + std::vector requestData1{0., 0., 0., 0., 0., 0., 0, 0., 0., 0}; + std::vector requestData2{0., 0., 0., 0., 0., 0., 0, 0., 0., 0}; + preparePredictRequest(request, inputsMeta, requestData1); + request.mutable_model_name()->assign("mediapipeAddADAPTFULL"); + } + void SetUp() override { + impl = &dynamic_cast( + ovms::Server::instance().getModule(ovms::GRPC_SERVER_MODULE_NAME)) + ->getKFSGrpcImpl(); + } + void TearDown() { + impl = nullptr; + } + static void SetUpTestSuite() { + SetUpServer("/ovms/src/test/mediapipe/config_mediapipe_all_graphs_adapter_full.json"); + } + static void TearDownTestSuite() { + ovms::Server::instance().setShutdownRequest(1); + thread->join(); + ovms::Server::instance().setShutdownRequest(0); + } +}; + +std::unique_ptr MediapipeValidationTest::thread = nullptr; + +TEST_F(MediapipeValidationTest, Ok1Input) { + prepareSingleInput(); + ASSERT_EQ(impl->ModelInfer(nullptr, &request, &response).error_code(), grpc::StatusCode::OK); +} + +TEST_F(MediapipeValidationTest, Ok2Inputs) { + prepareDoubleInput(); + ASSERT_EQ(impl->ModelInfer(nullptr, &request, &response).error_code(), grpc::StatusCode::OK); +} + +TEST_F(MediapipeValidationTest, TooManyInputs) { + prepareSingleInput(); + request.add_inputs()->CopyFrom(request.inputs(0)); + ASSERT_EQ(impl->ModelInfer(nullptr, &request, &response).error_code(), grpc::StatusCode::INVALID_ARGUMENT); +} + +TEST_F(MediapipeValidationTest, NotEnoughInputs) { + prepareSingleInput(); + request.clear_inputs(); + ASSERT_EQ(impl->ModelInfer(nullptr, &request, &response).error_code(), grpc::StatusCode::INVALID_ARGUMENT); +} + +TEST_F(MediapipeValidationTest, MultipleInputsSameName) { + prepareDoubleInput(); + request.mutable_inputs(1)->set_name("in1"); + ASSERT_EQ(impl->ModelInfer(nullptr, &request, &response).error_code(), grpc::StatusCode::INVALID_ARGUMENT); +} + +TEST_F(MediapipeValidationTest, InputWithUnexpectedName) { + prepareDoubleInput(); + request.mutable_inputs(1)->set_name("in3"); + ASSERT_EQ(impl->ModelInfer(nullptr, &request, &response).error_code(), grpc::StatusCode::INVALID_ARGUMENT); +} + +TEST_F(MediapipeValidationTest, DataInNonRawField) { + prepareSingleInput(); + request.mutable_raw_input_contents()->Clear(); + for (int i = 0; i < 10; i++) + request.mutable_inputs(0)->mutable_contents()->mutable_fp32_contents()->Add(); + ASSERT_EQ(impl->ModelInfer(nullptr, &request, &response).error_code(), grpc::StatusCode::INVALID_ARGUMENT); +} + +TEST_F(MediapipeValidationTest, NoDataInRawField) { + prepareSingleInput(); + request.mutable_raw_input_contents()->Clear(); + ASSERT_EQ(impl->ModelInfer(nullptr, &request, &response).error_code(), grpc::StatusCode::INVALID_ARGUMENT); +} + +TEST_F(MediapipeValidationTest, NegativeShape) { + prepareSingleInput(); + request.mutable_inputs(0)->mutable_shape()->Set(0, -1); + ASSERT_EQ(impl->ModelInfer(nullptr, &request, &response).error_code(), grpc::StatusCode::INVALID_ARGUMENT); +} + +TEST_F(MediapipeValidationTest, ZeroShape) { + prepareSingleInput(); + request.mutable_inputs(0)->mutable_shape()->Set(0, 0); + ASSERT_EQ(impl->ModelInfer(nullptr, &request, &response).error_code(), grpc::StatusCode::INVALID_ARGUMENT); +} + +TEST_F(MediapipeValidationTest, BufferShorterThanExpected) { + prepareSingleInput(); + request.mutable_inputs(0)->mutable_shape()->Set(0, 20); + ASSERT_EQ(impl->ModelInfer(nullptr, &request, &response).error_code(), grpc::StatusCode::INVALID_ARGUMENT); +} + +TEST_F(MediapipeValidationTest, BufferLargerThanExpected) { + prepareSingleInput(); + request.mutable_inputs(0)->mutable_shape()->Set(1, 1); + ASSERT_EQ(impl->ModelInfer(nullptr, &request, &response).error_code(), grpc::StatusCode::INVALID_ARGUMENT); +} + +TEST_F(MediapipeValidationTest, WrongPrecision) { + prepareSingleInput(); + request.mutable_inputs(0)->set_datatype("unknown"); + ASSERT_EQ(impl->ModelInfer(nullptr, &request, &response).error_code(), grpc::StatusCode::INVALID_ARGUMENT); +} diff --git a/src/test/mediapipeflow_test.cpp b/src/test/mediapipeflow_test.cpp new file mode 100644 index 0000000000..4d1b3d4f23 --- /dev/null +++ b/src/test/mediapipeflow_test.cpp @@ -0,0 +1,988 @@ +//***************************************************************************** +// Copyright 2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "../config.hpp" +#include "../grpcservermodule.hpp" +#include "../http_rest_api_handler.hpp" +#include "../kfs_frontend/kfs_grpc_inference_service.hpp" +#include "../mediapipe_calculators/modelapiovmsadapter.hpp" +#include "../mediapipe_internal/mediapipefactory.hpp" +#include "../mediapipe_internal/mediapipegraphdefinition.hpp" +#include "../metric_config.hpp" +#include "../metric_module.hpp" +#include "../model_service.hpp" +#include "../precision.hpp" +#include "../servablemanagermodule.hpp" +#include "../server.hpp" +#include "../shape.hpp" +#include "../stringutils.hpp" +#include "../tfs_frontend/tfs_utils.hpp" +#include "c_api_test_utils.hpp" +#include "test_utils.hpp" + +using namespace ovms; + +using testing::HasSubstr; +using testing::Not; + +class MediapipeFlowTest : public ::testing::TestWithParam { +protected: + ovms::Server& server = ovms::Server::instance(); + + const Precision precision = Precision::FP32; + std::unique_ptr t; + std::string port = "9178"; + void SetUpServer(const char* configPath) { + server.setShutdownRequest(0); + randomizePort(this->port); + char* argv[] = {(char*)"ovms", + (char*)"--config_path", + (char*)configPath, + (char*)"--port", + (char*)port.c_str()}; + int argc = 5; + t.reset(new std::thread([&argc, &argv, this]() { + EXPECT_EQ(EXIT_SUCCESS, server.start(argc, argv)); + })); + auto start = std::chrono::high_resolution_clock::now(); + while ((server.getModuleState(SERVABLE_MANAGER_MODULE_NAME) != ovms::ModuleState::INITIALIZED) && + (!server.isReady()) && + (std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start).count() < 5)) { + } + } + + void SetUp() override { + } + void TearDown() { + server.setShutdownRequest(1); + t->join(); + server.setShutdownRequest(0); + } +}; + +class MediapipeFlowAddTest : public MediapipeFlowTest { +public: + void SetUp() { + SetUpServer("/ovms/src/test/mediapipe/config_mediapipe_add_adapter_full.json"); + } +}; +class MediapipeFlowDummyTest : public MediapipeFlowTest { +public: + void SetUp() { + SetUpServer("/ovms/src/test/mediapipe/config_mediapipe_dummy_adapter_full.json"); + } +}; +class MediapipeFlowDummyPathsRelativeToBasePathTest : public MediapipeFlowTest { +public: + void SetUp() { + SetUpServer("/ovms/src/test/mediapipe/config_mediapipe_dummy_adapter_full_relative_to_base_path.json"); + } +}; + +class MediapipeFlowDummyNoGraphPathTest : public MediapipeFlowTest { +public: + void SetUp() { + SetUpServer("/ovms/src/test/mediapipe/config_mediapipe_dummy_adapter_full_no_graph_path.json"); + } +}; + +class MediapipeFlowDummyOnlyGraphNameSpecified : public MediapipeFlowTest { +public: + void SetUp() { + SetUpServer("/ovms/src/test/mediapipe/config_mediapipe_dummy_adapter_full_only_name_specified.json"); + } +}; + +class MediapipeFlowDummySubconfigTest : public MediapipeFlowTest { +public: + void SetUp() { + SetUpServer("/ovms/src/test/mediapipe/config_mediapipe_dummy_adapter_full_subconfig.json"); + } +}; + +class MediapipeFlowDummyDefaultSubconfigTest : public MediapipeFlowTest { +public: + void SetUp() { + SetUpServer("/ovms/src/test/mediapipe/config_mediapipe_dummy_adapter_default_subconfig.json"); + } +}; + +static void performMediapipeInferTest(const ovms::Server& server, ::KFSRequest& request, ::KFSResponse& response, const Precision& precision, const std::string& modelName) { + const ovms::Module* grpcModule = server.getModule(ovms::GRPC_SERVER_MODULE_NAME); + KFSInferenceServiceImpl& impl = dynamic_cast(grpcModule)->getKFSGrpcImpl(); + request.Clear(); + response.Clear(); + inputs_info_t inputsMeta{{"in", {DUMMY_MODEL_SHAPE, precision}}}; + preparePredictRequest(request, inputsMeta); + request.mutable_model_name()->assign(modelName); + ASSERT_EQ(impl.ModelInfer(nullptr, &request, &response).error_code(), grpc::StatusCode::OK); + auto outputs = response.outputs(); + ASSERT_EQ(outputs.size(), 1); + ASSERT_EQ(outputs[0].name(), "out"); + ASSERT_EQ(outputs[0].shape().size(), 2); + ASSERT_EQ(outputs[0].shape()[0], 1); + ASSERT_EQ(outputs[0].shape()[1], 10); +} + +TEST_F(MediapipeFlowDummyOnlyGraphNameSpecified, Infer) { + ::KFSRequest request; + ::KFSResponse response; + const std::string modelName = "graphdummy"; + performMediapipeInferTest(server, request, response, precision, modelName); + + std::vector requestData{0., 0., 0, 0., 0., 0., 0., 0, 0., 0.}; + checkDummyResponse("out", requestData, request, response, 1, 1, modelName); +} + +TEST_F(MediapipeFlowDummyDefaultSubconfigTest, Infer) { + ::KFSRequest request; + ::KFSResponse response; + const std::string modelName = "mediaDummy"; + performMediapipeInferTest(server, request, response, precision, modelName); + + std::vector requestData{0., 0., 0, 0., 0., 0., 0., 0, 0., 0.}; + checkDummyResponse("out", requestData, request, response, 1, 1, modelName); +} + +TEST_F(MediapipeFlowDummyPathsRelativeToBasePathTest, Infer) { + ::KFSRequest request; + ::KFSResponse response; + const std::string modelName = "mediaDummy"; + performMediapipeInferTest(server, request, response, precision, modelName); + + std::vector requestData{0., 0., 0, 0., 0., 0., 0., 0, 0., 0.}; + checkDummyResponse("out", requestData, request, response, 1, 1, modelName); +} + +TEST_F(MediapipeFlowDummySubconfigTest, Infer) { + ::KFSRequest request; + ::KFSResponse response; + const std::string modelName = "mediaDummy"; + performMediapipeInferTest(server, request, response, precision, modelName); + + std::vector requestData{0., 0., 0, 0., 0., 0., 0., 0, 0., 0.}; + checkDummyResponse("out", requestData, request, response, 1, 1, modelName); +} + +class MediapipeFlowDummyDummyInSubconfigAndConfigTest : public MediapipeFlowTest { +public: + void SetUp() { + SetUpServer("/ovms/src/test/mediapipe/config_mediapipe_dummy_adapter_full_dummy_in_both_config_and_subconfig.json"); + } +}; + +TEST_F(MediapipeFlowDummyDummyInSubconfigAndConfigTest, Infer) { + const ovms::Module* grpcModule = server.getModule(ovms::GRPC_SERVER_MODULE_NAME); + KFSInferenceServiceImpl& impl = dynamic_cast(grpcModule)->getKFSGrpcImpl(); + ::KFSRequest request; + ::KFSResponse response; + + const std::string modelName = "mediaDummy"; + request.Clear(); + response.Clear(); + inputs_info_t inputsMeta{{"in", {{1, 12}, precision}}}; + preparePredictRequest(request, inputsMeta); + request.mutable_model_name()->assign(modelName); + ASSERT_EQ(impl.ModelInfer(nullptr, &request, &response).error_code(), grpc::StatusCode::OK); + auto outputs = response.outputs(); + ASSERT_EQ(outputs.size(), 1); + ASSERT_EQ(outputs[0].name(), "out"); + ASSERT_EQ(outputs[0].shape().size(), 2); + ASSERT_EQ(outputs[0].shape()[0], 1); + ASSERT_EQ(outputs[0].shape()[1], 12); +} + +TEST_F(MediapipeFlowDummyNoGraphPathTest, Infer) { + const ovms::Module* grpcModule = server.getModule(ovms::GRPC_SERVER_MODULE_NAME); + KFSInferenceServiceImpl& impl = dynamic_cast(grpcModule)->getKFSGrpcImpl(); + ::KFSRequest request; + ::KFSResponse response; + + const std::string modelName = "graphdummy"; + request.Clear(); + response.Clear(); + inputs_info_t inputsMeta{{"in", {DUMMY_MODEL_SHAPE, precision}}}; + preparePredictRequest(request, inputsMeta); + request.mutable_model_name()->assign(modelName); + ASSERT_EQ(impl.ModelInfer(nullptr, &request, &response).error_code(), grpc::StatusCode::OK); + // TODO validate output + auto outputs = response.outputs(); + ASSERT_EQ(outputs.size(), 1); + ASSERT_EQ(outputs[0].name(), "out"); + ASSERT_EQ(outputs[0].shape().size(), 2); + ASSERT_EQ(outputs[0].shape()[0], 1); + ASSERT_EQ(outputs[0].shape()[1], 10); + std::vector requestData{0., 0., 0, 0., 0., 0., 0., 0, 0., 0.}; + checkDummyResponse("out", requestData, request, response, 1, 1, modelName); +} + +TEST_P(MediapipeFlowDummyTest, Infer) { + const ovms::Module* grpcModule = server.getModule(ovms::GRPC_SERVER_MODULE_NAME); + KFSInferenceServiceImpl& impl = dynamic_cast(grpcModule)->getKFSGrpcImpl(); + ::KFSRequest request; + ::KFSResponse response; + + const std::string modelName = GetParam(); + request.Clear(); + response.Clear(); + inputs_info_t inputsMeta{{"in", {DUMMY_MODEL_SHAPE, precision}}}; + preparePredictRequest(request, inputsMeta); + request.mutable_model_name()->assign(modelName); + ASSERT_EQ(impl.ModelInfer(nullptr, &request, &response).error_code(), grpc::StatusCode::OK); + // TODO validate output + auto outputs = response.outputs(); + ASSERT_EQ(outputs.size(), 1); + ASSERT_EQ(outputs[0].name(), "out"); + ASSERT_EQ(outputs[0].shape().size(), 2); + ASSERT_EQ(outputs[0].shape()[0], 1); + ASSERT_EQ(outputs[0].shape()[1], 10); + std::vector requestData{0., 0., 0, 0., 0., 0., 0., 0, 0., 0.}; + checkDummyResponse("out", requestData, request, response, 1, 1, modelName); +} + +TEST_P(MediapipeFlowAddTest, Infer) { + const ovms::Module* grpcModule = server.getModule(ovms::GRPC_SERVER_MODULE_NAME); + KFSInferenceServiceImpl& impl = dynamic_cast(grpcModule)->getKFSGrpcImpl(); + ::KFSRequest request; + ::KFSResponse response; + const std::string modelName = GetParam(); + request.Clear(); + response.Clear(); + inputs_info_t inputsMeta{ + {"in1", {DUMMY_MODEL_SHAPE, precision}}, + {"in2", {DUMMY_MODEL_SHAPE, precision}}}; + std::vector requestData1{0., 0., 0., 0., 0., 0., 0, 0., 0., 0}; + std::vector requestData2{0., 0., 0., 0., 0., 0., 0, 0., 0., 0}; + preparePredictRequest(request, inputsMeta, requestData1); + request.mutable_model_name()->assign(modelName); + ASSERT_EQ(impl.ModelInfer(nullptr, &request, &response).error_code(), grpc::StatusCode::OK); + auto outputs = response.outputs(); + ASSERT_EQ(outputs.size(), 1); + ASSERT_EQ(outputs[0].name(), "out"); + ASSERT_EQ(outputs[0].shape().size(), 2); + ASSERT_EQ(outputs[0].shape()[0], 1); + ASSERT_EQ(outputs[0].shape()[1], 10); + checkAddResponse("out", requestData1, requestData2, request, response, 1, 1, modelName); +} + +TEST_F(MediapipeFlowTest, InferWithParams) { + SetUpServer("/ovms/src/test/mediapipe/config_mediapipe_graph_with_side_packets.json"); + const ovms::Module* grpcModule = server.getModule(ovms::GRPC_SERVER_MODULE_NAME); + KFSInferenceServiceImpl& impl = dynamic_cast(grpcModule)->getKFSGrpcImpl(); + ::KFSRequest request; + ::KFSResponse response; + const std::string modelName = "mediaWithParams"; + request.Clear(); + response.Clear(); + inputs_info_t inputsMeta{ + {"in_not_used", {{1, 1}, ovms::Precision::I32}}}; + std::vector requestData{0.}; + preparePredictRequest(request, inputsMeta, requestData); + request.mutable_model_name()->assign(modelName); + // here add params + const std::string stringParamValue = "abecadlo"; + const bool boolParamValue = true; + const int64_t int64ParamValue = 42; + request.mutable_parameters()->operator[]("string_param").set_string_param(stringParamValue); + request.mutable_parameters()->operator[]("bool_param").set_bool_param(boolParamValue); + request.mutable_parameters()->operator[]("int64_param").set_int64_param(int64ParamValue); + ASSERT_EQ(impl.ModelInfer(nullptr, &request, &response).error_code(), grpc::StatusCode::OK); + auto outputs = response.outputs(); + // here check outputs + ASSERT_EQ(outputs.size(), 3); + // 1st string + auto it = response.outputs().begin(); + size_t outputId = 0; + while (it != response.outputs().end()) { + if (it->name() != "out_string") { + ++it; + ++outputId; + continue; + } + ASSERT_EQ(it->datatype(), "UINT8"); + ASSERT_EQ(it->shape_size(), 1); + ASSERT_EQ(it->shape(0), stringParamValue.size()); + const std::string& content = response.raw_output_contents(outputId); + SPDLOG_ERROR("Received output size:{} content:{}", content.size(), content); + EXPECT_EQ(content, stringParamValue); + break; + } + ASSERT_NE(it, response.outputs().end()); + it = response.outputs().begin(); + outputId = 0; + while (it != response.outputs().end()) { + if (it->name() != "out_bool") { + ++it; + ++outputId; + continue; + } + ASSERT_EQ(it->datatype(), "BOOL"); + ASSERT_EQ(it->shape_size(), 1); + ASSERT_EQ(it->shape(0), 1); + const std::string& content = response.raw_output_contents(outputId); + ASSERT_EQ(content.size(), sizeof(bool)); + const bool castContent = *((bool*)content.data()); + SPDLOG_ERROR("Received output size:{} content:{}; castContent:{}", content.size(), content, castContent); + EXPECT_EQ(castContent, boolParamValue); + break; + } + ASSERT_NE(it, response.outputs().end()); + it = response.outputs().begin(); + outputId = 0; + while (it != response.outputs().end()) { + if (it->name() != "out_int64") { + ++it; + ++outputId; + continue; + } + ASSERT_EQ(it->datatype(), "INT64"); + ASSERT_EQ(it->shape_size(), 1); + ASSERT_EQ(it->shape(0), 1); + const std::string& content = response.raw_output_contents(outputId); + ASSERT_EQ(content.size(), sizeof(int64_t)); + const int64_t castContent = *((int64_t*)content.data()); + SPDLOG_ERROR("Received output size:{} content:{}; castContent:{}", content.size(), content, castContent); + EXPECT_EQ(castContent, int64ParamValue); + break; + } + ASSERT_NE(it, response.outputs().end()); +} + +using testing::ElementsAre; + +TEST_P(MediapipeFlowAddTest, AdapterMetadata) { + const std::string modelName = "add"; + mediapipe::ovms::OVMSInferenceAdapter adapter(modelName); + const std::shared_ptr model; + ov::Core unusedCore; + ov::AnyMap notUsedAnyMap; + adapter.loadModel(model, unusedCore, "NOT_USED", notUsedAnyMap); + EXPECT_THAT(adapter.getInputNames(), ElementsAre(SUM_MODEL_INPUT_NAME_1, SUM_MODEL_INPUT_NAME_2)); + EXPECT_THAT(adapter.getOutputNames(), ElementsAre(SUM_MODEL_OUTPUT_NAME)); + EXPECT_EQ(adapter.getInputShape(SUM_MODEL_INPUT_NAME_1), ov::Shape({1, 10})); + EXPECT_EQ(adapter.getInputShape(SUM_MODEL_INPUT_NAME_2), ov::Shape({1, 10})); +} + +namespace { +class MockModelInstance : public ovms::ModelInstance { +public: + MockModelInstance(ov::Core& ieCore) : + ModelInstance("UNUSED_NAME", UNUSED_MODEL_VERSION, ieCore) { + } + ov::AnyMap getRTInfo() const override { + std::vector mockLabels; + for (size_t i = 0; i < 5; i++) { + mockLabels.emplace_back(std::to_string(i)); + } + ov::AnyMap configuration = { + {"layout", "data:HWCN"}, + {"resize_type", "unnatural"}, + {"labels", mockLabels}}; + return configuration; + } +}; + +class MockModel : public ovms::Model { +public: + MockModel(const std::string& name) : + Model(name, false /*stateful*/, nullptr) {} + std::shared_ptr modelInstanceFactory(const std::string& modelName, const ovms::model_version_t, ov::Core& ieCore, ovms::MetricRegistry* registry = nullptr, const ovms::MetricConfig* metricConfig = nullptr) override { + return std::make_shared(ieCore); + } +}; + +class MockModelManager : public ovms::ModelManager { + ovms::MetricRegistry registry; + +public: + std::shared_ptr modelFactory(const std::string& name, const bool isStateful) override { + return std::make_shared(name); + } + +public: + MockModelManager(const std::string& modelCacheDirectory = "") : + ovms::ModelManager(modelCacheDirectory, ®istry) { + } + ~MockModelManager() { + spdlog::info("Destructor of modelmanager(Enabled one). Models #:{}", models.size()); + join(); + spdlog::info("Destructor of modelmanager(Enabled one). Models #:{}", models.size()); + models.clear(); + spdlog::info("Destructor of modelmanager(Enabled one). Models #:{}", models.size()); + } +}; + +class MockedServableManagerModule : public ovms::ServableManagerModule { + mutable MockModelManager mockModelManager; + +public: + MockedServableManagerModule(ovms::Server& ovmsServer) : + ovms::ServableManagerModule(ovmsServer) { + } + ModelManager& getServableManager() const override { + return mockModelManager; + } +}; + +class MockedServer : public Server { +public: + MockedServer() = default; + std::unique_ptr createModule(const std::string& name) override { + if (name != ovms::SERVABLE_MANAGER_MODULE_NAME) + return Server::createModule(name); + return std::make_unique(*this); + }; + Module* getModule(const std::string& name) { + return const_cast(Server::getModule(name)); + } +}; + +} // namespace + +TEST(Mediapipe, AdapterRTInfo) { + MockedServer server; + OVMS_Server* cserver = reinterpret_cast(&server); + OVMS_ServerSettings* serverSettings = nullptr; + OVMS_ModelsSettings* modelsSettings = nullptr; + ASSERT_CAPI_STATUS_NULL(OVMS_ServerSettingsNew(&serverSettings)); + ASSERT_CAPI_STATUS_NULL(OVMS_ModelsSettingsNew(&modelsSettings)); + std::string port{"5555"}; + randomizePort(port); + uint32_t portNum = ovms::stou32(port).value(); + ASSERT_CAPI_STATUS_NULL(OVMS_ServerSettingsSetGrpcPort(serverSettings, portNum)); + // we will use dummy model that will have mocked rt_info + ASSERT_CAPI_STATUS_NULL(OVMS_ModelsSettingsSetConfigPath(modelsSettings, "/ovms/src/test/c_api/config.json")); + + ASSERT_CAPI_STATUS_NULL(OVMS_ServerStartFromConfigurationFile(cserver, serverSettings, modelsSettings)); + const std::string mockedModelName = "dummy"; + uint32_t servableVersion = 1; + mediapipe::ovms::OVMSInferenceAdapter adapter(mockedModelName, servableVersion, cserver); + const std::shared_ptr model; + /*ov::AnyMap configuration = { + {"layout", "data:HWCN"}, + {"resize_type", "unnatural"}, + {"labels", mockLabels}*/ + ov::Core unusedCore; + ov::AnyMap notUsedAnyMap; + adapter.loadModel(model, unusedCore, "NOT_USED", notUsedAnyMap); + ov::AnyMap modelConfig = adapter.getModelConfig(); + auto checkModelInfo = [](const ov::AnyMap& modelConfig) { + ASSERT_EQ(modelConfig.size(), 3); + auto it = modelConfig.find("resize_type"); + ASSERT_NE(modelConfig.end(), it); + EXPECT_EQ(std::string("unnatural"), it->second.as()); + it = modelConfig.find("layout"); + ASSERT_NE(modelConfig.end(), it); + ASSERT_EQ(std::string("data:HWCN"), it->second.as()); + it = modelConfig.find("labels"); + ASSERT_NE(modelConfig.end(), it); + const std::vector& resultLabels = it->second.as>(); + EXPECT_THAT(resultLabels, ElementsAre("0", "1", "2", "3", "4")); + }; + checkModelInfo(modelConfig); + + OVMS_ServableMetadata* servableMetadata = nullptr; + ASSERT_CAPI_STATUS_NULL(OVMS_GetServableMetadata(cserver, mockedModelName.c_str(), servableVersion, &servableMetadata)); + + const ov::AnyMap* servableMetadataRtInfo; + ASSERT_CAPI_STATUS_NULL(OVMS_ServableMetadataGetInfo(servableMetadata, reinterpret_cast(&servableMetadataRtInfo))); + ASSERT_NE(nullptr, servableMetadataRtInfo); + checkModelInfo(*servableMetadataRtInfo); + OVMS_ServableMetadataDelete(servableMetadata); +} + +TEST(Mediapipe, MetadataDummy) { + ConstructorEnabledModelManager manager; + ovms::MediapipeGraphConfig mgc{"mediaDummy", "", "/ovms/src/test/mediapipe/graphdummy.pbtxt"}; + ovms::MediapipeGraphDefinition mediapipeDummy("mediaDummy", mgc); + ASSERT_EQ(mediapipeDummy.validate(manager), StatusCode::OK); + tensor_map_t inputs = mediapipeDummy.getInputsInfo(); + tensor_map_t outputs = mediapipeDummy.getOutputsInfo(); + ASSERT_EQ(inputs.size(), 1); + ASSERT_EQ(outputs.size(), 1); + ASSERT_NE(inputs.find("in"), inputs.end()); + ASSERT_NE(outputs.find("out"), outputs.end()); + const auto& input = inputs.at("in"); + EXPECT_EQ(input->getShape(), Shape({})); + EXPECT_EQ(input->getPrecision(), ovms::Precision::UNDEFINED); + const auto& output = outputs.at("out"); + EXPECT_EQ(output->getShape(), Shape({})); + EXPECT_EQ(output->getPrecision(), ovms::Precision::UNDEFINED); +} + +const std::vector mediaGraphsDummy{"mediaDummy", + "mediaDummyADAPTFULL"}; +const std::vector mediaGraphsAdd{"mediapipeAdd", + "mediapipeAddADAPTFULL"}; + +class MediapipeConfig : public MediapipeFlowTest { +public: + void TearDown() override {} +}; + +const std::string NAME = "Name"; +TEST_F(MediapipeConfig, MediapipeGraphDefinitionNonExistentFile) { + ConstructorEnabledModelManager manager; + MediapipeGraphConfig mgc{"noname", "/ovms/NONEXISTENT_FILE"}; + MediapipeGraphDefinition mgd(NAME, mgc); + EXPECT_EQ(mgd.validate(manager), StatusCode::FILE_INVALID); +} + +TEST_F(MediapipeConfig, MediapipeAdd) { + ConstructorEnabledModelManager manager; + auto status = manager.startFromFile("/ovms/src/test/mediapipe/config_mediapipe_add_adapter_full.json"); + EXPECT_EQ(status, ovms::StatusCode::OK); + + for (auto& graphName : mediaGraphsAdd) { + auto graphDefinition = manager.getMediapipeFactory().findDefinitionByName(graphName); + EXPECT_NE(graphDefinition, nullptr); + EXPECT_EQ(graphDefinition->getStatus().isAvailable(), true); + } + + manager.join(); +} + +class MediapipeNoTagMapping : public TestWithTempDir { +protected: + ovms::Server& server = ovms::Server::instance(); + const Precision precision = Precision::FP32; + std::unique_ptr t; + std::string port = "9178"; + void SetUpServer(const char* configPath) { + server.setShutdownRequest(0); + randomizePort(this->port); + char* argv[] = {(char*)"ovms", + (char*)"--config_path", + (char*)configPath, + (char*)"--port", + (char*)port.c_str()}; + int argc = 5; + t.reset(new std::thread([&argc, &argv, this]() { + EXPECT_EQ(EXIT_SUCCESS, server.start(argc, argv)); + })); + auto start = std::chrono::high_resolution_clock::now(); + while ((server.getModuleState(SERVABLE_MANAGER_MODULE_NAME) != ovms::ModuleState::INITIALIZED) && + (!server.isReady()) && + (std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start).count() < 5)) { + } + } + void TearDown() { + server.setShutdownRequest(1); + t->join(); + server.setShutdownRequest(0); + TestWithTempDir::TearDown(); + } +}; + +TEST_F(MediapipeNoTagMapping, DummyUppercase) { + // Here we use dummy with uppercase input/output + // and we shouldn't need tag mapping + ConstructorEnabledModelManager manager; + // create config file + std::string configJson = R"( +{ + "model_config_list": [ + {"config": { + "name": "dummyUpper", + "base_path": "/ovms/src/test/dummyUppercase" + } + } + ], + "mediapipe_config_list": [ + { + "name":"mediapipeDummyUppercase", + "graph_path":"PATH_TO_REPLACE" + } + ] +})"; + const std::string pathToReplace = "PATH_TO_REPLACE"; + auto it = configJson.find(pathToReplace); + ASSERT_NE(it, std::string::npos); + const std::string graphPbtxt = R"( +input_stream: "in" +output_stream: "out" +node { + calculator: "ModelAPISessionCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com / mediapipe.ModelAPIOVMSSessionCalculatorOptions]: { + servable_name: "dummyUpper" + servable_version: "1" + } + } +} +node { + calculator: "ModelAPISideFeedCalculator" + input_side_packet: "SESSION:session" + input_stream: "B:in" + output_stream: "A:out" +})"; + const std::string pbtxtPath = this->directoryPath + "/graphDummyUppercase.pbtxt"; + createConfigFileWithContent(graphPbtxt, pbtxtPath); + configJson.replace(it, pathToReplace.size(), pbtxtPath); + + const std::string configJsonPath = this->directoryPath + "/subconfig.json"; + createConfigFileWithContent(configJson, configJsonPath); + this->SetUpServer(configJsonPath.c_str()); + // INFER + const ovms::Module* grpcModule = server.getModule(ovms::GRPC_SERVER_MODULE_NAME); + KFSInferenceServiceImpl& impl = dynamic_cast(grpcModule)->getKFSGrpcImpl(); + ::KFSRequest request; + ::KFSResponse response; + const std::string modelName = "mediapipeDummyUppercase"; + request.Clear(); + response.Clear(); + inputs_info_t inputsMeta{{"in", {DUMMY_MODEL_SHAPE, precision}}}; + preparePredictRequest(request, inputsMeta); + request.mutable_model_name()->assign(modelName); + ASSERT_EQ(impl.ModelInfer(nullptr, &request, &response).error_code(), grpc::StatusCode::OK); + std::vector requestData{0., 0., 0, 0., 0., 0., 0., 0, 0., 0.}; + checkDummyResponse("out", requestData, request, response, 1, 1, modelName); +} + +TEST_F(MediapipeConfig, MediapipeFullRelativePaths) { + ConstructorEnabledModelManager manager; + auto status = manager.startFromFile("/ovms/src/test/mediapipe/relative_paths/config_relative_dummy.json"); + EXPECT_EQ(status, ovms::StatusCode::OK); + + auto definitionAdd = manager.getMediapipeFactory().findDefinitionByName("mediapipeAddADAPT"); + EXPECT_NE(definitionAdd, nullptr); + EXPECT_EQ(definitionAdd->getStatus().isAvailable(), true); + + auto definitionFull = manager.getMediapipeFactory().findDefinitionByName("mediapipeAddADAPTFULL"); + EXPECT_NE(definitionFull, nullptr); + EXPECT_EQ(definitionFull->getStatus().isAvailable(), true); + + manager.join(); +} + +TEST_F(MediapipeConfig, MediapipeFullRelativePathsSubconfig) { + ConstructorEnabledModelManager manager; + auto status = manager.startFromFile("/ovms/src/test/mediapipe/relative_paths/config_relative_add_subconfig.json"); + EXPECT_EQ(status, ovms::StatusCode::OK); + + auto definitionFull = manager.getMediapipeFactory().findDefinitionByName("mediapipeAddADAPTFULL"); + EXPECT_NE(definitionFull, nullptr); + EXPECT_EQ(definitionFull->getStatus().isAvailable(), true); + auto model = manager.findModelByName("dummy1"); + ASSERT_NE(nullptr, model->getDefaultModelInstance()); + ASSERT_EQ(model->getDefaultModelInstance()->getStatus().getState(), ModelVersionState::AVAILABLE); + + auto definitionAdd = manager.getMediapipeFactory().findDefinitionByName("mediapipeAddADAPT"); + EXPECT_NE(definitionAdd, nullptr); + EXPECT_EQ(definitionAdd->getStatus().isAvailable(), true); + model = manager.findModelByName("dummy2"); + ASSERT_NE(nullptr, model->getDefaultModelInstance()); + ASSERT_EQ(model->getDefaultModelInstance()->getStatus().getState(), ModelVersionState::AVAILABLE); + + manager.join(); +} + +TEST_F(MediapipeConfig, MediapipeFullRelativePathsSubconfigBasePath) { + ConstructorEnabledModelManager manager; + auto status = manager.startFromFile("/ovms/src/test/mediapipe/relative_paths/config_relative_dummy_subconfig_base_path.json"); + EXPECT_EQ(status, ovms::StatusCode::OK); + + auto definitionFull = manager.getMediapipeFactory().findDefinitionByName("graphaddadapterfull"); + EXPECT_NE(definitionFull, nullptr); + EXPECT_EQ(definitionFull->getStatus().isAvailable(), true); + auto model = manager.findModelByName("dummy1"); + ASSERT_NE(nullptr, model->getDefaultModelInstance()); + ASSERT_EQ(model->getDefaultModelInstance()->getStatus().getState(), ModelVersionState::AVAILABLE); + + auto definitionAdd = manager.getMediapipeFactory().findDefinitionByName("graphadd"); + EXPECT_NE(definitionAdd, nullptr); + EXPECT_EQ(definitionAdd->getStatus().isAvailable(), true); + model = manager.findModelByName("dummy2"); + ASSERT_NE(nullptr, model->getDefaultModelInstance()); + ASSERT_EQ(model->getDefaultModelInstance()->getStatus().getState(), ModelVersionState::AVAILABLE); + + manager.join(); +} + +TEST_F(MediapipeConfig, MediapipeFullRelativePathsNegative) { + ConstructorEnabledModelManager manager; + auto status = manager.startFromFile("/ovms/src/test/mediapipe/relative_paths/config_relative_dummy_negative.json"); + EXPECT_EQ(status, ovms::StatusCode::OK); + + auto definitionAdd = manager.getMediapipeFactory().findDefinitionByName("mediapipeAddADAPT"); + EXPECT_NE(definitionAdd, nullptr); + EXPECT_EQ(definitionAdd->getStatus().isAvailable(), false); + + auto definitionFull = manager.getMediapipeFactory().findDefinitionByName("mediapipeAddADAPTFULL"); + EXPECT_NE(definitionFull, nullptr); + EXPECT_EQ(definitionFull->getStatus().isAvailable(), false); + + manager.join(); +} + +class MediapipeConfigChanges : public TestWithTempDir { + void SetUp() override { + TestWithTempDir::SetUp(); + } + +public: + static const std::string mgdName; + static const std::string configFileWithGraphPathToReplace; + static const std::string configFileWithGraphPathToReplaceWithoutModel; + static const std::string configFileWithGraphPathToReplaceAndSubconfig; + static const std::string configFileWithoutGraph; + static const std::string pbtxtContent; + template + static void checkStatus(ModelManager& manager, ovms::StatusCode code) { + std::shared_ptr executor; + Request request; + Response response; + auto status = manager.createPipeline(executor, mgdName, &request, &response); + EXPECT_EQ(status, code) << status.string(); + } +}; +const std::string MediapipeConfigChanges::mgdName{"mediapipeGraph"}; +const std::string MediapipeConfigChanges::configFileWithGraphPathToReplace = R"( +{ + "model_config_list": [ + {"config": { + "name": "dummy", + "base_path": "/ovms/src/test/dummy" + } + } + ], + "mediapipe_config_list": [ + { + "name":"mediapipeGraph", + "graph_path":"XYZ" + } + ] +} +)"; + +const std::string MediapipeConfigChanges::configFileWithGraphPathToReplaceAndSubconfig = R"( +{ + "model_config_list": [], + "mediapipe_config_list": [ + { + "name":"mediapipeGraph", + "graph_path":"XYZ", + "subconfig":"SUBCONFIG_PATH" + } + ] +} +)"; + +const std::string MediapipeConfigChanges::configFileWithGraphPathToReplaceWithoutModel = R"( +{ + "model_config_list": [], + "mediapipe_config_list": [ + { + "name":"mediapipeGraph", + "graph_path":"XYZ" + } + ] +} +)"; + +const std::string MediapipeConfigChanges::configFileWithoutGraph = R"( +{ + "model_config_list": [ + {"config": { + "name": "dummy", + "base_path": "/ovms/src/test/dummy" + } + } + ] +} +)"; + +const std::string MediapipeConfigChanges::pbtxtContent = R"( +input_stream: "in" +output_stream: "out" +node { + calculator: "ModelAPISessionCalculator" + output_side_packet: "SESSION:session" + node_options: { + [type.googleapis.com / mediapipe.ModelAPIOVMSSessionCalculatorOptions]: { + servable_name: "dummy" + servable_version: "1" + } + } +} +node { + calculator: "ModelAPISideFeedCalculator" + input_side_packet: "SESSION:session" + input_stream: "B:in" + output_stream: "A:out" + node_options: { + [type.googleapis.com / mediapipe.ModelAPIInferenceCalculatorOptions]: { + tag_to_input_tensor_names { + key: "B" + value: "b" + } + tag_to_output_tensor_names { + key: "A" + value: "a" + } + } + } +} +)"; + +TEST_F(MediapipeConfigChanges, AddProperGraphThenRetireThenAddAgain) { + std::string configFileContent = configFileWithGraphPathToReplace; + std::string configFilePath = directoryPath + "/subconfig.json"; + std::string graphFilePath = directoryPath + "/graph.pbtxt"; + const std::string modelPathToReplace{"XYZ"}; + configFileContent.replace(configFileContent.find(modelPathToReplace), modelPathToReplace.size(), graphFilePath); + createConfigFileWithContent(configFileContent, configFilePath); + createConfigFileWithContent(pbtxtContent, graphFilePath); + ConstructorEnabledModelManager modelManager; + modelManager.loadConfig(configFilePath); + const MediapipeFactory& factory = modelManager.getMediapipeFactory(); + auto definition = factory.findDefinitionByName(mgdName); + ASSERT_NE(nullptr, definition); + ASSERT_EQ(definition->getStatus().getStateCode(), PipelineDefinitionStateCode::AVAILABLE); + checkStatus(modelManager, StatusCode::OK); + // now we retire + configFileContent = configFileWithoutGraph; + createConfigFileWithContent(configFileContent, configFilePath); + modelManager.loadConfig(configFilePath); + definition = factory.findDefinitionByName(mgdName); + ASSERT_NE(nullptr, definition); + ASSERT_EQ(definition->getStatus().getStateCode(), PipelineDefinitionStateCode::RETIRED); + checkStatus(modelManager, StatusCode::MEDIAPIPE_DEFINITION_NOT_LOADED_ANYMORE); + // now we add again + configFileContent = configFileWithGraphPathToReplace; + configFileContent.replace(configFileContent.find(modelPathToReplace), modelPathToReplace.size(), graphFilePath); + createConfigFileWithContent(configFileContent, configFilePath); + modelManager.loadConfig(configFilePath); + definition = factory.findDefinitionByName(mgdName); + ASSERT_NE(nullptr, definition); + ASSERT_EQ(definition->getStatus().getStateCode(), PipelineDefinitionStateCode::AVAILABLE); + checkStatus(modelManager, StatusCode::OK); +} + +TEST_F(MediapipeConfigChanges, AddImroperGraphThenFixWithReloadThenBreakAgain) { + std::string configFileContent = configFileWithGraphPathToReplace; + std::string configFilePath = directoryPath + "/subconfig.json"; + std::string graphFilePath = directoryPath + "/graph.pbtxt"; + createConfigFileWithContent(configFileContent, configFilePath); + createConfigFileWithContent(pbtxtContent, graphFilePath); + ConstructorEnabledModelManager modelManager; + modelManager.loadConfig(configFilePath); + const MediapipeFactory& factory = modelManager.getMediapipeFactory(); + auto definition = factory.findDefinitionByName(mgdName); + ASSERT_NE(nullptr, definition); + ASSERT_EQ(definition->getStatus().getStateCode(), PipelineDefinitionStateCode::LOADING_PRECONDITION_FAILED); + ovms::Status status; + checkStatus(modelManager, StatusCode::MEDIAPIPE_DEFINITION_NOT_LOADED_YET); + // TODO check for tfs as well - now not supported + // checkStatus(modelManager, StatusCode::MEDIAPIPE_DEFINITION_NOT_LOADED_YET); + // now we fix the config + const std::string modelPathToReplace{"XYZ"}; + configFileContent.replace(configFileContent.find(modelPathToReplace), modelPathToReplace.size(), graphFilePath); + createConfigFileWithContent(configFileContent, configFilePath); + modelManager.loadConfig(configFilePath); + ASSERT_NE(nullptr, definition); + ASSERT_EQ(definition->getStatus().getStateCode(), PipelineDefinitionStateCode::AVAILABLE); + checkStatus(modelManager, StatusCode::OK); + // now we break + configFileContent = configFileWithGraphPathToReplace; + createConfigFileWithContent(configFileContent, configFilePath); + modelManager.loadConfig(configFilePath); + definition = factory.findDefinitionByName(mgdName); + ASSERT_NE(nullptr, definition); + ASSERT_EQ(definition->getStatus().getStateCode(), PipelineDefinitionStateCode::LOADING_PRECONDITION_FAILED); + checkStatus(modelManager, StatusCode::MEDIAPIPE_DEFINITION_NOT_LOADED_YET); +} + +TEST_F(MediapipeConfigChanges, AddModelToConfigThenUnloadThenAddToSubconfig) { + std::string configFileContent = configFileWithGraphPathToReplace; + std::string configFilePath = directoryPath + "/config.json"; + std::string graphFilePath = directoryPath + "/graph.pbtxt"; + const std::string modelPathToReplace{"XYZ"}; + configFileContent.replace(configFileContent.find(modelPathToReplace), modelPathToReplace.size(), graphFilePath); + createConfigFileWithContent(configFileContent, configFilePath); + createConfigFileWithContent(pbtxtContent, graphFilePath); + ConstructorEnabledModelManager modelManager; + modelManager.loadConfig(configFilePath); + const MediapipeFactory& factory = modelManager.getMediapipeFactory(); + auto model = modelManager.findModelByName("dummy"); + ASSERT_NE(nullptr, model->getDefaultModelInstance()); + ASSERT_EQ(model->getDefaultModelInstance()->getStatus().getState(), ModelVersionState::AVAILABLE); + auto definition = factory.findDefinitionByName(mgdName); + ASSERT_NE(nullptr, definition); + ASSERT_EQ(definition->getStatus().getStateCode(), PipelineDefinitionStateCode::AVAILABLE); + checkStatus(modelManager, StatusCode::OK); + // now we retire the model + configFileContent = configFileWithGraphPathToReplaceWithoutModel; + configFileContent.replace(configFileContent.find(modelPathToReplace), modelPathToReplace.size(), graphFilePath); + createConfigFileWithContent(configFileContent, configFilePath); + modelManager.loadConfig(configFilePath); + model = modelManager.findModelByName("dummy"); + ASSERT_EQ(nullptr, model->getDefaultModelInstance()); + definition = factory.findDefinitionByName(mgdName); + ASSERT_NE(nullptr, definition); + ASSERT_EQ(definition->getStatus().getStateCode(), PipelineDefinitionStateCode::AVAILABLE); + checkStatus(modelManager, StatusCode::OK); + // now we add model to subconfig + std::string subconfigFilePath = directoryPath + "/subconfig.json"; + SPDLOG_ERROR("{}", subconfigFilePath); + configFileContent = configFileWithoutGraph; + createConfigFileWithContent(configFileContent, subconfigFilePath); + configFileContent = configFileWithGraphPathToReplaceAndSubconfig; + configFileContent.replace(configFileContent.find(modelPathToReplace), modelPathToReplace.size(), graphFilePath); + const std::string subconfigPathToReplace{"SUBCONFIG_PATH"}; + configFileContent.replace(configFileContent.find(subconfigPathToReplace), subconfigPathToReplace.size(), subconfigFilePath); + createConfigFileWithContent(configFileContent, configFilePath); + modelManager.loadConfig(configFilePath); + model = modelManager.findModelByName("dummy"); + ASSERT_NE(nullptr, model->getDefaultModelInstance()); + ASSERT_EQ(model->getDefaultModelInstance()->getStatus().getState(), ModelVersionState::AVAILABLE); + definition = factory.findDefinitionByName(mgdName); + ASSERT_NE(nullptr, definition); + ASSERT_EQ(definition->getStatus().getStateCode(), PipelineDefinitionStateCode::AVAILABLE); + checkStatus(modelManager, StatusCode::OK); +} + +// TEST_F(MediapipeConfig, MediapipeFullRelativePathsSubconfigNegative) { +// ConstructorEnabledModelManager manager; +// auto status = manager.startFromFile("/ovms/src/test/mediapipe/relative_paths/config_relative_add_subconfig_negative.json"); +// EXPECT_EQ(status, ovms::StatusCode::JSON_INVALID); +// manager.join(); +// } + +INSTANTIATE_TEST_SUITE_P( + Test, + MediapipeFlowAddTest, + ::testing::ValuesIn(mediaGraphsAdd), + [](const ::testing::TestParamInfo& info) { + return info.param; + }); +INSTANTIATE_TEST_SUITE_P( + Test, + MediapipeFlowDummyTest, + ::testing::ValuesIn(mediaGraphsDummy), + [](const ::testing::TestParamInfo& info) { + return info.param; + }); diff --git a/src/test/metrics_flow_test.cpp b/src/test/metrics_flow_test.cpp index c2292b952e..caf5b3b34a 100644 --- a/src/test/metrics_flow_test.cpp +++ b/src/test/metrics_flow_test.cpp @@ -41,7 +41,7 @@ using testing::HasSubstr; using testing::Not; // This checks for counter to be present with exact value and other remaining metrics of the family to be 0. -void checkRequestsCounter(const std::string& collectedMetricData, const std::string& metricName, const std::string& endpointName, std::optional endpointVersion, const std::string& interfaceName, const std::string& method, const std::string& api, int value) { +static void checkRequestsCounter(const std::string& collectedMetricData, const std::string& metricName, const std::string& endpointName, std::optional endpointVersion, const std::string& interfaceName, const std::string& method, const std::string& api, int value) { for (std::string _interface : std::set{"gRPC", "REST"}) { for (std::string _api : std::set{"TensorFlowServing", "KServe"}) { if (_api == "KServe") { @@ -150,7 +150,6 @@ TEST_F(MetricFlowTest, GrpcPredict) { preparePredictRequest(request, inputsMeta); ASSERT_EQ(impl.Predict(nullptr, &request, &response).error_code(), grpc::StatusCode::OK); } - // Failed single model calls for (int i = 0; i < numberOfFailedRequests; i++) { request.Clear(); @@ -166,7 +165,7 @@ TEST_F(MetricFlowTest, GrpcPredict) { request.Clear(); response.Clear(); request.mutable_model_spec()->mutable_name()->assign(dagName); - inputs_info_t inputsMeta{{DUMMY_MODEL_INPUT_NAME, {signed_shape_t{dynamicBatch, 1, DUMMY_MODEL_INPUT_SIZE}, correctPrecision}}}; + inputs_info_t inputsMeta{{DUMMY_MODEL_INPUT_NAME, {ovms::signed_shape_t{dynamicBatch, 1, DUMMY_MODEL_INPUT_SIZE}, correctPrecision}}}; preparePredictRequest(request, inputsMeta); ASSERT_EQ(impl.Predict(nullptr, &request, &response).error_code(), grpc::StatusCode::OK); } @@ -176,7 +175,7 @@ TEST_F(MetricFlowTest, GrpcPredict) { request.Clear(); response.Clear(); request.mutable_model_spec()->mutable_name()->assign(dagName); - inputs_info_t inputsMeta{{DUMMY_MODEL_INPUT_NAME, {signed_shape_t{dynamicBatch, 1, DUMMY_MODEL_INPUT_SIZE}, wrongPrecision}}}; + inputs_info_t inputsMeta{{DUMMY_MODEL_INPUT_NAME, {ovms::signed_shape_t{dynamicBatch, 1, DUMMY_MODEL_INPUT_SIZE}, wrongPrecision}}}; preparePredictRequest(request, inputsMeta); ASSERT_EQ(impl.Predict(nullptr, &request, &response).error_code(), grpc::StatusCode::INVALID_ARGUMENT); } @@ -280,7 +279,7 @@ TEST_F(MetricFlowTest, GrpcModelInfer) { for (int i = 0; i < numberOfSuccessRequests; i++) { request.Clear(); response.Clear(); - inputs_info_t inputsMeta{{DUMMY_MODEL_INPUT_NAME, {signed_shape_t{dynamicBatch, 1, DUMMY_MODEL_INPUT_SIZE}, correctPrecision}}}; + inputs_info_t inputsMeta{{DUMMY_MODEL_INPUT_NAME, {ovms::signed_shape_t{dynamicBatch, 1, DUMMY_MODEL_INPUT_SIZE}, correctPrecision}}}; preparePredictRequest(request, inputsMeta); request.mutable_model_name()->assign(dagName); ASSERT_EQ(impl.ModelInfer(nullptr, &request, &response).error_code(), grpc::StatusCode::OK); @@ -289,7 +288,7 @@ TEST_F(MetricFlowTest, GrpcModelInfer) { for (int i = 0; i < numberOfFailedRequests; i++) { request.Clear(); response.Clear(); - inputs_info_t inputsMeta{{DUMMY_MODEL_INPUT_NAME, {signed_shape_t{dynamicBatch, 1, DUMMY_MODEL_INPUT_SIZE}, wrongPrecision}}}; + inputs_info_t inputsMeta{{DUMMY_MODEL_INPUT_NAME, {ovms::signed_shape_t{dynamicBatch, 1, DUMMY_MODEL_INPUT_SIZE}, wrongPrecision}}}; preparePredictRequest(request, inputsMeta); request.mutable_model_name()->assign(dagName); ASSERT_EQ(impl.ModelInfer(nullptr, &request, &response).error_code(), grpc::StatusCode::INVALID_ARGUMENT); diff --git a/src/test/model_service_test.cpp b/src/test/model_service_test.cpp index ad65699f73..04dcf20aee 100644 --- a/src/test/model_service_test.cpp +++ b/src/test/model_service_test.cpp @@ -20,6 +20,7 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wall" +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" #include "tensorflow_serving/apis/get_model_status.pb.h" #include "tensorflow_serving/apis/model_service.grpc.pb.h" #include "tensorflow_serving/apis/model_service.pb.h" @@ -64,12 +65,12 @@ using MyTypes = ::testing::Types< TYPED_TEST_SUITE(ModelServiceTest, MyTypes); -void executeModelStatus(const TFSGetModelStatusRequest& modelStatusRequest, TFSGetModelStatusResponse& modelStatusResponse, ModelManager& manager, ExecutionContext context, ovms::StatusCode statusCode = StatusCode::OK) { +static void executeModelStatus(const TFSGetModelStatusRequest& modelStatusRequest, TFSGetModelStatusResponse& modelStatusResponse, ModelManager& manager, ExecutionContext context, ovms::StatusCode statusCode = StatusCode::OK) { modelStatusResponse.Clear(); ASSERT_EQ(GetModelStatusImpl::getModelStatus(&modelStatusRequest, &modelStatusResponse, manager, context), statusCode); } -void setModelStatusRequest(TFSGetModelStatusRequest& modelStatusRequest, const std::string& name, int version) { +static void setModelStatusRequest(TFSGetModelStatusRequest& modelStatusRequest, const std::string& name, int version) { modelStatusRequest.Clear(); auto model_spec = modelStatusRequest.mutable_model_spec(); model_spec->Clear(); @@ -79,7 +80,7 @@ void setModelStatusRequest(TFSGetModelStatusRequest& modelStatusRequest, const s } } -void verifyModelStatusResponse(const TFSGetModelStatusResponse& modelStatusResponse, const std::vector& versions = {1}) { +static void verifyModelStatusResponse(const TFSGetModelStatusResponse& modelStatusResponse, const std::vector& versions = {1}) { ASSERT_EQ(modelStatusResponse.model_version_status_size(), versions.size()); for (size_t i = 0; i < versions.size(); i++) { auto& model_version_status = modelStatusResponse.model_version_status()[i]; @@ -91,16 +92,16 @@ void verifyModelStatusResponse(const TFSGetModelStatusResponse& modelStatusRespo } } -void verifyModelStatusResponse(const KFSGetModelStatusResponse& modelStatusResponse, const std::vector& versions = {1}) { +static void verifyModelStatusResponse(const KFSGetModelStatusResponse& modelStatusResponse, const std::vector& versions = {1}) { ASSERT_TRUE(modelStatusResponse.ready()); } -void executeModelStatus(const KFSGetModelStatusRequest& modelStatusRequest, KFSGetModelStatusResponse& modelStatusResponse, ModelManager& manager, ExecutionContext context, ovms::StatusCode statusCode = StatusCode::OK) { +static void executeModelStatus(const KFSGetModelStatusRequest& modelStatusRequest, KFSGetModelStatusResponse& modelStatusResponse, ModelManager& manager, ExecutionContext context, ovms::StatusCode statusCode = StatusCode::OK) { modelStatusResponse.Clear(); ASSERT_EQ(KFSInferenceServiceImpl::getModelReady(&modelStatusRequest, &modelStatusResponse, manager, context), statusCode); } -void setModelStatusRequest(KFSGetModelStatusRequest& modelStatusRequest, const std::string& name, int version) { +static void setModelStatusRequest(KFSGetModelStatusRequest& modelStatusRequest, const std::string& name, int version) { modelStatusRequest.Clear(); modelStatusRequest.set_name(name); if (version) @@ -186,6 +187,33 @@ TYPED_TEST(ModelServiceTest, pipeline) { verifyModelStatusResponse(this->modelStatusResponse); } +#if (MEDIAPIPE_DISABLE == 0) +TYPED_TEST(ModelServiceTest, MediapipeGraph) { + std::string fileToReload = "/ovms/src/test/mediapipe/config_mediapipe_dummy_adapter_full.json"; + ASSERT_EQ(this->manager.startFromFile(fileToReload), StatusCode::OK); + + const std::string name = "mediaDummyADAPTFULL"; + + // existing version + int version = 1; + setModelStatusRequest(this->modelStatusRequest, name, version); + executeModelStatus(this->modelStatusRequest, this->modelStatusResponse, this->manager, DEFAULT_TEST_CONTEXT); + verifyModelStatusResponse(this->modelStatusResponse); + + // No version specified - with 0 version value is not set in helper function + version = 0; + setModelStatusRequest(this->modelStatusRequest, name, version); + executeModelStatus(this->modelStatusRequest, this->modelStatusResponse, this->manager, DEFAULT_TEST_CONTEXT); + verifyModelStatusResponse(this->modelStatusResponse); + + // Any version + version = 5; + setModelStatusRequest(this->modelStatusRequest, name, version); + executeModelStatus(this->modelStatusRequest, this->modelStatusResponse, this->manager, DEFAULT_TEST_CONTEXT); + verifyModelStatusResponse(this->modelStatusResponse); +} +#endif + TYPED_TEST(ModelServiceTest, non_existing_model) { const std::string name = "non_existing_model"; int version = 0; diff --git a/src/test/modelconfig_test.cpp b/src/test/modelconfig_test.cpp index b784b90a65..969e1313d0 100644 --- a/src/test/modelconfig_test.cpp +++ b/src/test/modelconfig_test.cpp @@ -40,6 +40,11 @@ TEST(ModelConfig, getters_setters) { auto path = config.getBasePath(); EXPECT_EQ(path, "/path"); + config.setRootDirectoryPath("/pathto/"); + config.setBasePath("relative/path"); + path = config.getBasePath(); + EXPECT_EQ(path, "/pathto/relative/path"); + config.setTargetDevice("GPU"); auto device = config.getTargetDevice(); EXPECT_EQ(device, "GPU"); diff --git a/src/test/modelinstance_test.cpp b/src/test/modelinstance_test.cpp index 84f77ea99f..09781389ce 100644 --- a/src/test/modelinstance_test.cpp +++ b/src/test/modelinstance_test.cpp @@ -72,6 +72,14 @@ TEST_F(TestUnloadModel, SuccessfulUnload) { EXPECT_EQ(ovms::ModelVersionState::END, modelInstance.getStatus().getState()); } +TEST_F(TestUnloadModel, SuccessfulUnloadSaved_Model) { + ovms::ModelInstance modelInstance("UNUSED_NAME", UNUSED_MODEL_VERSION, *ieCore); + ASSERT_EQ(modelInstance.loadModel(DUMMY_SAVED_MODEL_CONFIG), ovms::StatusCode::OK); + ASSERT_EQ(ovms::ModelVersionState::AVAILABLE, modelInstance.getStatus().getState()); + modelInstance.retireModel(); + EXPECT_EQ(ovms::ModelVersionState::END, modelInstance.getStatus().getState()); +} + TEST_F(TestUnloadModel, CantUnloadModelWhilePredictPathAcquiredAndLockedInstance) { ovms::ModelInstance modelInstance("UNUSED_NAME", UNUSED_MODEL_VERSION, *ieCore); ovms::Status status = modelInstance.loadModel(DUMMY_MODEL_CONFIG); @@ -159,10 +167,11 @@ TEST_F(TestUnloadModel, CheckIfStateIsUnloadingDuringUnloading) { EXPECT_EQ(ovms::ModelVersionState::END, mockModelInstance.getStatus().getState()); } -class TestLoadModel : public ::testing::Test { +class TestLoadModel : public TestWithTempDir { protected: std::unique_ptr ieCore; void SetUp() { + TestWithTempDir::SetUp(); ieCore = std::make_unique(); } }; @@ -291,7 +300,7 @@ TEST_F(TestLoadModel, CheckIfOVNonExistingBinFileErrorIsCatched) { TEST_F(TestLoadModel, CheckIfNonExistingXmlFileReturnsFileInvalid) { ovms::ModelInstance modelInstance("UNUSED_NAME", UNUSED_MODEL_VERSION, *ieCore); - const std::string modelPath = "/tmp/test_load_model"; + const std::string modelPath = directoryPath + "/test_load_model"; std::filesystem::create_directories(modelPath); ovms::model_version_t version = 1; const std::string versionDirectoryPath = modelPath + "/" + std::to_string(version); @@ -327,7 +336,7 @@ TEST_F(TestLoadModel, CheckIfNonExistingXmlFileReturnsFileInvalid) { TEST_F(TestLoadModel, CheckIfNonExistingBinFileReturnsFileInvalid) { ovms::ModelInstance modelInstance("UNUSED_NAME", UNUSED_MODEL_VERSION, *ieCore); - const std::string modelPath = "/tmp/test_load_model"; + const std::string modelPath = directoryPath + "/test_load_model"; std::filesystem::create_directories(modelPath); ovms::model_version_t version = 1; const std::string versionDirectoryPath = modelPath + "/" + std::to_string(version); @@ -360,6 +369,154 @@ TEST_F(TestLoadModel, CheckIfNonExistingBinFileReturnsFileInvalid) { EXPECT_EQ(status, ovms::StatusCode::FILE_INVALID) << status.string(); } +TEST_F(TestLoadModel, CheckMultipleFormatsHandling) { + ovms::ModelInstance modelInstance("UNUSED_NAME", UNUSED_MODEL_VERSION, *ieCore); + + const std::string modelPath = directoryPath + "/test_multiple_models"; + std::filesystem::create_directories(modelPath); + ovms::model_version_t version = 1; + const std::string versionDirectoryPath = modelPath + "/" + std::to_string(version); + if (!std::filesystem::exists(versionDirectoryPath)) { + ASSERT_TRUE(std::filesystem::create_directories(versionDirectoryPath)); + } + { + std::ofstream xmlFile{versionDirectoryPath + "/model.xml"}; + xmlFile << "NOT_NEEDED_CONTENT" << std::endl; + } + { + std::ofstream binFile{versionDirectoryPath + "/model.bin"}; + binFile << "NOT_NEEDED_CONTENT" << std::endl; + } + { + std::ofstream onnxFile{versionDirectoryPath + "/model.onnx"}; + onnxFile << "NOT_NEEDED_CONTENT" << std::endl; + } + { + std::ofstream savedModelFile{versionDirectoryPath + "/saved_model.pb"}; + savedModelFile << "NOT_NEEDED_CONTENT" << std::endl; + } + + const ovms::ModelConfig config{ + "NOT_USED_NAME", + modelPath, // base path + "CPU", // target device + "1", // batchsize + 1, // NIREQ + false, // is stateful + false, // idle sequence cleanup enabled + false, // low latency transformation enabled + 500, // stateful sequence max number, + "", // cache dir + version, // version + modelPath, // local path + }; + auto status = modelInstance.loadModel(config); + auto model_files = modelInstance.getModelFiles(); + EXPECT_EQ(model_files.front(), directoryPath + "/test_multiple_models/1/model.xml"); +} + +TEST_F(TestLoadModel, CheckSavedModelHandling) { + ovms::ModelInstance modelInstance("saved-model", UNUSED_MODEL_VERSION, *ieCore); + + const std::string modelPath = directoryPath + "/test_saved_model"; + std::filesystem::create_directories(modelPath); + ovms::model_version_t version = 1; + const std::string versionDirectoryPath = modelPath + "/" + std::to_string(version); + if (!std::filesystem::exists(versionDirectoryPath)) { + ASSERT_TRUE(std::filesystem::create_directories(versionDirectoryPath)); + } + { + std::ofstream savedModelFile{versionDirectoryPath + "/saved_model.pb"}; + savedModelFile << "NOT_NEEDED_CONTENT" << std::endl; + } + const ovms::ModelConfig config{ + "saved-model", + modelPath, // base path + "CPU", // target device + "1", // batchsize + 1, // NIREQ + false, // is stateful + false, // idle sequence cleanup enabled + false, // low latency transformation enabled + 500, // stateful sequence max number, + "", // cache dir + version, // version + modelPath, // local path + }; + auto status = modelInstance.loadModel(config); + auto model_files = modelInstance.getModelFiles(); + + EXPECT_EQ(model_files.front(), directoryPath + "/test_saved_model/1/"); +} + +TEST_F(TestLoadModel, CheckTFModelHandling) { + ovms::ModelInstance modelInstance("tf", UNUSED_MODEL_VERSION, *ieCore); + + const std::string modelPath = directoryPath + "/test_tf"; + std::filesystem::create_directories(modelPath); + ovms::model_version_t version = 1; + const std::string versionDirectoryPath = modelPath + "/" + std::to_string(version); + if (!std::filesystem::exists(versionDirectoryPath)) { + ASSERT_TRUE(std::filesystem::create_directories(versionDirectoryPath)); + } + { + std::ofstream savedModelFile{versionDirectoryPath + "/model.pb"}; + savedModelFile << "NOT_NEEDED_CONTENT" << std::endl; + } + const ovms::ModelConfig config{ + "saved-model", + modelPath, // base path + "CPU", // target device + "1", // batchsize + 1, // NIREQ + false, // is stateful + false, // idle sequence cleanup enabled + false, // low latency transformation enabled + 500, // stateful sequence max number, + "", // cache dir + version, // version + modelPath, // local path + }; + auto status = modelInstance.loadModel(config); + auto model_files = modelInstance.getModelFiles(); + ASSERT_NE(model_files.size(), 0); + EXPECT_EQ(model_files.front(), directoryPath + "/test_tf/1/model.pb"); +} + +TEST_F(TestLoadModel, CheckONNXModelHandling) { + ovms::ModelInstance modelInstance("tf", UNUSED_MODEL_VERSION, *ieCore); + + const std::string modelPath = directoryPath + "/test_onnx"; + std::filesystem::create_directories(modelPath); + ovms::model_version_t version = 1; + const std::string versionDirectoryPath = modelPath + "/" + std::to_string(version); + if (!std::filesystem::exists(versionDirectoryPath)) { + ASSERT_TRUE(std::filesystem::create_directories(versionDirectoryPath)); + } + { + std::ofstream savedModelFile{versionDirectoryPath + "/my-model.onnx"}; + savedModelFile << "NOT_NEEDED_CONTENT" << std::endl; + } + const ovms::ModelConfig config{ + "saved-model", + modelPath, // base path + "CPU", // target device + "1", // batchsize + 1, // NIREQ + false, // is stateful + false, // idle sequence cleanup enabled + false, // low latency transformation enabled + 500, // stateful sequence max number, + "", // cache dir + version, // version + modelPath, // local path + }; + auto status = modelInstance.loadModel(config); + auto model_files = modelInstance.getModelFiles(); + + EXPECT_EQ(model_files.front(), directoryPath + "/test_onnx/1/my-model.onnx"); +} + TEST_F(TestLoadModel, SuccessfulLoad) { ovms::ModelInstance modelInstance("UNUSED_NAME", UNUSED_MODEL_VERSION, *ieCore); EXPECT_EQ(modelInstance.loadModel(DUMMY_MODEL_CONFIG), ovms::StatusCode::OK); @@ -464,6 +621,26 @@ TEST_F(TestLoadModelWithMapping, SuccessfulLoad) { EXPECT_EQ(modelInstance.loadModel(config), ovms::StatusCode::OK); EXPECT_EQ(ovms::ModelVersionState::AVAILABLE, modelInstance.getStatus().getState()); + EXPECT_EQ(modelInstance.getInputsInfo().begin()->second->getPreProcessingHint(), ovms::TensorInfo::ProcessingHint::NO_PROCESSING); + EXPECT_EQ(modelInstance.getInputsInfo().begin()->second->getShape(), ovms::Shape({1, 10})); + EXPECT_EQ(modelInstance.getOutputsInfo().begin()->second->getShape(), ovms::Shape({1, 10})); +} + +TEST_F(TestLoadModelWithMapping, SuccessfulLoadBytesEncoded) { + ovms::ModelInstance modelInstance("UNUSED_NAME", UNUSED_MODEL_VERSION, *ieCore); + + ovms::ShapeInfo inputShape{ovms::FIXED, {1, 20}}; + shapeMap["input"] = inputShape; + config.setShapes(shapeMap); + + layouts["input"] = ovms::LayoutConfiguration{"NC"}; + layouts["output"] = ovms::LayoutConfiguration{"NC"}; + config.setLayouts(layouts); + config.parseShapeParameter("(1,10,10)"); + + EXPECT_EQ(modelInstance.loadModel(config), ovms::StatusCode::OK); + EXPECT_EQ(ovms::ModelVersionState::AVAILABLE, modelInstance.getStatus().getState()); + EXPECT_EQ(modelInstance.getInputsInfo().begin()->second->getPreProcessingHint(), ovms::TensorInfo::ProcessingHint::NO_PROCESSING); EXPECT_EQ(modelInstance.getInputsInfo().begin()->second->getShape(), ovms::Shape({1, 10})); EXPECT_EQ(modelInstance.getOutputsInfo().begin()->second->getShape(), ovms::Shape({1, 10})); } @@ -861,3 +1038,45 @@ TEST(CpuThroughputNotSpecified, AffinityWithNumStreams) { EXPECT_EQ(pluginConfig.count("AFFINITY"), 1); EXPECT_EQ(pluginConfig.count("NUM_STREAMS"), 1); } + +TEST(TensorMap, TestProcessingHintFromShape) { + auto servableInputs = ovms::tensor_map_t({ + {"Input_FP32_1_224_224_3_NHWC", + std::make_shared("Input_FP32_1_224_224_3_NHWC", ovms::Precision::FP32, ovms::shape_t{1, 224, 224, 3})}, + {"Input_U8_1_3_NCHW", + std::make_shared("Input_U8_1_3_NCHW", ovms::Precision::U8, ovms::shape_t{1, 3})}, + {"Input_U8_3_N", + std::make_shared("Input_U8_3_N", ovms::Precision::U8, ovms::shape_t{3})}, + {"Input_U8_-1_N", + std::make_shared("Input_U8_-1_N", ovms::Precision::U8, ovms::Shape{ovms::Dimension::any()})}, + }); + auto servableOutputs = ovms::tensor_map_t({{"Output_U8_-1_-1_N?", + std::make_shared("Output_U8_-1_-1_N?", ovms::Precision::U8, ovms::Shape{ovms::Dimension::any(), ovms::Dimension::any()})}, + {"Output_U8_-1_-1_N?_string", + std::make_shared("Output_U8_-1_-1_N?_string", ovms::Precision::U8, ovms::Shape{ovms::Dimension::any(), ovms::Dimension::any()})}, + {"Output_FP32_-1_-1_N?_string", + std::make_shared("Output_FP32_-1_-1_N?_string", ovms::Precision::FP32, ovms::Shape{ovms::Dimension::any(), ovms::Dimension::any()})}}); + + EXPECT_EQ(servableInputs["Input_FP32_1_224_224_3_NHWC"]->getPreProcessingHint(), ovms::TensorInfo::ProcessingHint::IMAGE); + EXPECT_EQ(servableInputs["Input_U8_1_3_NCHW"]->getPreProcessingHint(), ovms::TensorInfo::ProcessingHint::STRING_2D_U8); + EXPECT_EQ(servableInputs["Input_U8_3_N"]->getPreProcessingHint(), ovms::TensorInfo::ProcessingHint::NO_PROCESSING); // due to static dimension + EXPECT_EQ(servableInputs["Input_U8_-1_N"]->getPreProcessingHint(), ovms::TensorInfo::ProcessingHint::STRING_1D_U8); + EXPECT_EQ(servableOutputs["Output_U8_-1_-1_N?"]->getPostProcessingHint(), ovms::TensorInfo::ProcessingHint::NO_PROCESSING); // due to no suffix + EXPECT_EQ(servableOutputs["Output_U8_-1_-1_N?_string"]->getPostProcessingHint(), ovms::TensorInfo::ProcessingHint::STRING_2D_U8); // due to suffix + EXPECT_EQ(servableOutputs["Output_FP32_-1_-1_N?_string"]->getPostProcessingHint(), ovms::TensorInfo::ProcessingHint::NO_PROCESSING); // no processing due to not being U8 +} + +TEST(TensorMap, TestProcessingHintFromShape_Demultiplexer) { + auto servableInputs = ovms::tensor_map_t({ + {"Input_FP32_1_1_224_224_3_NHWC", + std::make_shared("Input_FP32_1_1_224_224_3_NHWC", ovms::Precision::FP32, ovms::shape_t{1, 224, 224, 3})->createCopyWithDemultiplexerDimensionPrefix(1)}, + {"Input_U8_1_1_3_NCHW", + std::make_shared("Input_U8_1_1_3_NCHW", ovms::Precision::U8, ovms::shape_t{1, 3})->createCopyWithDemultiplexerDimensionPrefix(1)}, + {"Input_U8_1_3_N", + std::make_shared("Input_U8_1_3_N", ovms::Precision::U8, ovms::shape_t{3})->createCopyWithDemultiplexerDimensionPrefix(1)}, + }); + + EXPECT_EQ(servableInputs["Input_FP32_1_1_224_224_3_NHWC"]->getPreProcessingHint(), ovms::TensorInfo::ProcessingHint::IMAGE); + EXPECT_EQ(servableInputs["Input_U8_1_1_3_NCHW"]->getPreProcessingHint(), ovms::TensorInfo::ProcessingHint::NO_PROCESSING); // due to demultiplexer + EXPECT_EQ(servableInputs["Input_U8_1_3_N"]->getPreProcessingHint(), ovms::TensorInfo::ProcessingHint::NO_PROCESSING); // due to demultiplexer +} diff --git a/src/test/modelmanager_test.cpp b/src/test/modelmanager_test.cpp index fc4cbf6026..eedadfe3cb 100644 --- a/src/test/modelmanager_test.cpp +++ b/src/test/modelmanager_test.cpp @@ -88,6 +88,38 @@ const char* config_2_models_new = R"({ }] })"; +const char* relative_config_1_model = R"({ + "model_config_list": [ + { + "config": { + "name": "resnet", + "base_path": "models/dummy1", + "target_device": "CPU", + "model_version_policy": {"all": {}} + } + }] +})"; + +const char* relative_config_2_models = R"({ + "model_config_list": [ + { + "config": { + "name": "resnet", + "base_path": "models/dummy1", + "target_device": "CPU", + "model_version_policy": {"all": {}} + } + }, + { + "config": { + "name": "alpha", + "base_path": "models/dummy2", + "target_device": "CPU", + "model_version_policy": {"all": {}} + } + }] +})"; + const std::string FIRST_MODEL_NAME = "resnet"; const std::string SECOND_MODEL_NAME = "alpha"; @@ -123,6 +155,13 @@ TEST_F(ModelManager, ConfigParseNoModels) { EXPECT_EQ(status, ovms::StatusCode::OK); } +#if (MEDIAPIPE_DISABLE == 1) +TEST_F(ModelManager, ConfigParseDisableMediapipe) { + auto status = fixtureManager.startFromFile("/ovms/src/test/mediapipe/config_mediapipe_add_adapter_full.json"); + EXPECT_EQ(status, ovms::StatusCode::JSON_INVALID); +} +#endif + TEST_F(ModelManager, WrongConfigFile) { std::string configFile = "123/tmp/not_a_valid_file_name"; auto status = fixtureManager.startFromFile(configFile); @@ -517,19 +556,39 @@ TEST_F(ModelManager, parseConfigWhenPipelineDefinitionMatchSchema) { modelMock.reset(); } -void setupModelsDirs() { +static void setupModelsDirs() { std::filesystem::create_directory("/tmp/models"); std::filesystem::create_directory("/tmp/models/dummy1"); std::filesystem::create_directory("/tmp/models/dummy2"); } -TEST(ModelManagerWatcher, configRelodNotNeededManyThreads) { +const std::vector WATCHER_TEST_CONFIGS{ + config_2_models, + relative_config_2_models, +}; + +class ModelManagerWatcher2Models : public ::testing::TestWithParam {}; + +std::string fromContentsToString(const std::string& configContents) { + return configContents.find("tmp") == std::string::npos ? "FullPath" : "RelativePath"; +} + +INSTANTIATE_TEST_SUITE_P( + Test, + ModelManagerWatcher2Models, + ::testing::ValuesIn(WATCHER_TEST_CONFIGS), + [](const ::testing::TestParamInfo& info) { + return fromContentsToString(info.param); + }); + +TEST_P(ModelManagerWatcher2Models, configRelodNotNeededManyThreads) { std::string configFile = "/tmp/config.json"; modelMock = std::make_shared(); MockModelManager manager; setupModelsDirs(); - createConfigFileWithContent(config_2_models, configFile); + std::string config_contents = GetParam(); + createConfigFileWithContent(config_contents, configFile); auto status = manager.startFromFile(configFile); EXPECT_EQ(status, ovms::StatusCode::OK); std::this_thread::sleep_for(std::chrono::seconds(1)); @@ -553,13 +612,14 @@ TEST(ModelManagerWatcher, configRelodNotNeededManyThreads) { modelMock.reset(); } -TEST(ModelManagerWatcher, configRelodNeededManyThreads) { +TEST_P(ModelManagerWatcher2Models, configRelodNeededManyThreads) { std::string configFile = "/tmp/config.json"; modelMock = std::make_shared(); MockModelManager manager; setupModelsDirs(); - createConfigFileWithContent(config_2_models, configFile); + std::string config_contents = GetParam(); + createConfigFileWithContent(config_contents, configFile); auto status = manager.startFromFile(configFile); EXPECT_EQ(status, ovms::StatusCode::OK); std::this_thread::sleep_for(std::chrono::seconds(1)); @@ -589,13 +649,14 @@ TEST(ModelManagerWatcher, configRelodNeededManyThreads) { modelMock.reset(); } -TEST(ModelManagerWatcher, configReloadNeededChange) { +TEST_P(ModelManagerWatcher2Models, configReloadNeededChange) { std::string configFile = "/tmp/config.json"; modelMock = std::make_shared(); MockModelManager manager; - createConfigFileWithContent(config_2_models, configFile); + std::string config_contents = GetParam(); + createConfigFileWithContent(config_contents, configFile); auto status = manager.startFromFile(configFile); EXPECT_EQ(status, ovms::StatusCode::OK); std::this_thread::sleep_for(std::chrono::seconds(1)); @@ -612,13 +673,14 @@ TEST(ModelManagerWatcher, configReloadNeededChange) { modelMock.reset(); } -TEST(ModelManagerWatcher, loadConfigManyThreads) { +TEST_P(ModelManagerWatcher2Models, loadConfigManyThreads) { std::string configFile = "/tmp/config.json"; modelMock = std::make_shared(); MockModelManager manager; setupModelsDirs(); - createConfigFileWithContent(config_2_models, configFile); + std::string config_contents = GetParam(); + createConfigFileWithContent(config_contents, configFile); auto status = manager.startFromFile(configFile); EXPECT_EQ(status, ovms::StatusCode::OK); @@ -641,13 +703,14 @@ TEST(ModelManagerWatcher, loadConfigManyThreads) { modelMock.reset(); } -TEST(ModelManagerWatcher, configReloadNeededBeforeConfigLoad) { +TEST_P(ModelManagerWatcher2Models, configReloadNeededBeforeConfigLoad) { std::string configFile = "/tmp/config.json"; modelMock = std::make_shared(); MockModelManager manager; - createConfigFileWithContent(config_2_models, configFile); + std::string config_contents = GetParam(); + createConfigFileWithContent(config_contents, configFile); auto status = manager.startFromFile(configFile); EXPECT_EQ(status, ovms::StatusCode::OK); std::this_thread::sleep_for(std::chrono::seconds(1)); @@ -704,22 +767,78 @@ TEST(ModelManagerWatcher, parseConfigWhenOnlyPipelineDefinitionProvided) { TEST_F(ModelManager, ReadsVersionsFromDisk) { const std::string path = "/tmp/test_model/"; - for (auto i : {1, 5, 8, 10}) { - std::filesystem::create_directories(path + std::to_string(i)); + try { + for (auto i : {1, 5, 8, 10}) { + std::filesystem::create_directories(path + std::to_string(i)); + } + + std::filesystem::create_directories(path + "unknown_dir11"); // invalid version directory + ovms::model_versions_t versions; + std::shared_ptr fs = std::make_shared(); + + auto status = fixtureManager.readAvailableVersions(fs, path, versions); + + EXPECT_EQ(status, ovms::StatusCode::OK); + EXPECT_THAT(versions, ::testing::UnorderedElementsAre(1, 5, 8, 10)); + + for (auto i : {1, 5, 8, 10}) { + std::filesystem::remove(path + std::to_string(i)); + } + + std::filesystem::remove(path + "unknown_dir11"); // invalid version directory + } catch (...) { + for (auto i : {1, 5, 8, 10}) { + std::filesystem::remove(path + std::to_string(i)); + } + + std::filesystem::remove(path + "unknown_dir11"); // invalid version directory + } +} + +TEST_F(ModelManager, ReadsVersionsFromDiskRelativePath) { + const std::string path = "test_model/"; + + try { + for (auto i : {1, 5, 8, 10}) { + std::filesystem::create_directories(path + std::to_string(i)); + } + + std::filesystem::create_directories(path + "unknown_dir11"); // invalid version directory + ovms::model_versions_t versions; + std::shared_ptr fs = std::make_shared(); + + auto status = fixtureManager.readAvailableVersions(fs, path, versions); + + EXPECT_EQ(status, ovms::StatusCode::OK); + EXPECT_THAT(versions, ::testing::UnorderedElementsAre(1, 5, 8, 10)); + + for (auto i : {1, 5, 8, 10}) { + std::filesystem::remove(path + std::to_string(i)); + } + + std::filesystem::remove(path + "unknown_dir11"); // invalid version directory + } catch (...) { + for (auto i : {1, 5, 8, 10}) { + std::filesystem::remove(path + std::to_string(i)); + } + + std::filesystem::remove(path + "unknown_dir11"); // invalid version directory } +} + +TEST_F(ModelManager, PathEscapeError1) { + const std::string path = "/tmp/../test_model/"; - std::filesystem::create_directories(path + "unknown_dir11"); // invalid version directory ovms::model_versions_t versions; std::shared_ptr fs = std::make_shared(); auto status = fixtureManager.readAvailableVersions(fs, path, versions); - EXPECT_EQ(status, ovms::StatusCode::OK); - EXPECT_THAT(versions, ::testing::UnorderedElementsAre(1, 5, 8, 10)); + EXPECT_EQ(status, ovms::StatusCode::PATH_INVALID); } -TEST_F(ModelManager, PathEscapeError1) { - const std::string path = "/tmp/../test_model/"; +TEST_F(ModelManager, PathEscapeError1RelativePath) { + const std::string path = "tmp/../test_model/"; ovms::model_versions_t versions; std::shared_ptr fs = std::make_shared(); @@ -754,9 +873,21 @@ TEST_F(ModelManager, ReadVersionsInvalidPath) { EXPECT_EQ(status, ovms::StatusCode::PATH_INVALID); } +TEST_F(ModelManager, ReadVersionsInvalidPathRelativePath) { + const std::string path = "inexisting_path/8bt4kv"; + + try { + std::filesystem::remove(path); + } catch (const std::filesystem::filesystem_error&) { + } + + std::vector versions; + std::shared_ptr fs = std::make_shared(); + auto status = fixtureManager.readAvailableVersions(fs, path, versions); + EXPECT_EQ(status, ovms::StatusCode::PATH_INVALID); +} + TEST(ModelManagerWatcher, StartFromFile) { - std::filesystem::create_directories(model_1_path); - std::filesystem::create_directories(model_2_path); std::string fileToReload = "/tmp/ovms_config_file1.json"; createConfigFileWithContent(config_1_model, fileToReload); modelMock = std::make_shared(); @@ -771,6 +902,21 @@ TEST(ModelManagerWatcher, StartFromFile) { modelMock.reset(); } +TEST(ModelManagerWatcher, StartFromFileRelativePath) { + std::string fileToReload = "/tmp/ovms_config_file1.json"; + createConfigFileWithContent(relative_config_1_model, fileToReload); + modelMock = std::make_shared(); + MockModelManager manager; + + EXPECT_CALL(*modelMock, addVersion(_, _, _, _)) + .Times(1) + .WillRepeatedly(Return(ovms::Status(ovms::StatusCode::OK))); + auto status = manager.startFromFile(fileToReload); + EXPECT_EQ(status, ovms::StatusCode::OK); + manager.join(); + modelMock.reset(); +} + TEST(ModelManagerWatcher, StartFromFileWhenModelFilesMissing) { std::filesystem::create_directories(model_1_path); std::string fileToReload = "/tmp/ovms_config_file1.json"; @@ -782,9 +928,18 @@ TEST(ModelManagerWatcher, StartFromFileWhenModelFilesMissing) { manager.join(); } -TEST(ModelManagerWatcher, ConfigReloadingShouldAddNewModel) { +TEST(ModelManagerWatcher, StartFromFileWhenModelFilesMissingRelativePath) { std::filesystem::create_directories(model_1_path); - std::filesystem::create_directories(model_2_path); + std::string fileToReload = "/tmp/ovms_config_file1.json"; + createConfigFileWithContent(relative_config_1_model, fileToReload); + ConstructorEnabledModelManager manager; + ASSERT_TRUE(std::filesystem::is_empty(model_1_path)); + auto status = manager.startFromFile(fileToReload); + EXPECT_EQ(status, ovms::StatusCode::OK); + manager.join(); +} + +TEST(ModelManagerWatcher, ConfigReloadingShouldAddNewModel) { std::string fileToReload = "/tmp/ovms_config_file2.json"; createConfigFileWithContent(config_1_model, fileToReload); modelMock = std::make_shared(); @@ -815,6 +970,37 @@ TEST(ModelManagerWatcher, ConfigReloadingShouldAddNewModel) { modelMock.reset(); } +TEST(ModelManagerWatcher, ConfigReloadingShouldAddNewModelRelativePath) { + std::string fileToReload = "/tmp/ovms_config_file2.json"; + createConfigFileWithContent(relative_config_1_model, fileToReload); + modelMock = std::make_shared(); + MockModelManager manager; + EXPECT_CALL(*modelMock, addVersion(_, _, _, _)) + .WillRepeatedly(Return(ovms::Status(ovms::StatusCode::OK))); + + auto status = manager.startFromFile(fileToReload); + manager.startWatcher(true); + auto models = manager.getModels().size(); + EXPECT_EQ(models, 1); + EXPECT_EQ(status, ovms::StatusCode::OK); + std::thread t([&manager]() { + waitForOVMSConfigReload(manager); + }); + t.join(); + createConfigFileWithContent(relative_config_2_models, fileToReload); + bool isNeeded = false; + manager.configFileReloadNeeded(isNeeded); + ASSERT_EQ(isNeeded, true); + std::thread s([&manager]() { + waitForOVMSConfigReload(manager); + }); + s.join(); + models = manager.getModels().size(); + EXPECT_EQ(models, 2); + manager.join(); + modelMock.reset(); +} + struct CNLIMWrapperMock : public ovms::CNLIMWrapper { inline static int deinitializeSum = 0; @@ -1202,6 +1388,39 @@ TEST_F(ModelManager, ConfigReloadingWithTwoModelsWithTheSameName) { modelMock.reset(); } +TEST_F(ModelManager, ConfigReloadingWithTwoModelsWithTheSameNameRelativePath) { + const char* configWithTwoSameNames = R"({ + "model_config_list": [ + { + "config": { + "name": "same_name", + "base_path": "models/dummy1" + } + }, + { + "config": { + "name": "same_name", + "base_path": "models/dummy2" + } + }]})"; + std::filesystem::create_directories(model_1_path); + std::filesystem::create_directories(model_2_path); + std::string fileToReload = "/tmp/ovms_config_file2.json"; + createConfigFileWithContent(configWithTwoSameNames, fileToReload); + modelMock = std::make_shared(); + MockModelManager manager; + + EXPECT_CALL(*modelMock, addVersion(_, _, _, _)) + .Times(1) + .WillRepeatedly(Return(ovms::Status(ovms::StatusCode::OK))); + auto status = manager.startFromFile(fileToReload); + auto models = manager.getModels().size(); + EXPECT_EQ(models, 1); + EXPECT_EQ(status, ovms::StatusCode::OK); + manager.join(); + modelMock.reset(); +} + class MockModelManagerWithModelInstancesJustChangingStates : public ovms::ModelManager { public: std::shared_ptr modelFactory(const std::string& name, const bool isStateful) override { @@ -1259,6 +1478,42 @@ TEST_F(ModelManager, ConfigReloadingShouldRetireModelInstancesOfModelRemovedFrom manager.join(); } +TEST_F(ModelManager, ConfigReloadingShouldRetireModelInstancesOfModelRemovedFromJsonRelative) { + std::filesystem::create_directories(model_1_path); + std::filesystem::create_directories(model_2_path); + std::string fileToReload = "/tmp/ovms_config_file2.json"; + createConfigFileWithContent(relative_config_2_models, fileToReload); + modelMock = std::make_shared(); + MockModelManagerWithModelInstancesJustChangingStates manager; + manager.registerVersionToLoad(1); + manager.registerVersionToLoad(2); + auto status = manager.startFromFile(fileToReload); + manager.startWatcher(true); + auto models = manager.getModels(); + ASSERT_EQ(models.size(), 2); + ASSERT_EQ(status, ovms::StatusCode::OK); + waitForOVMSConfigReload(manager); + models = manager.getModels(); + ASSERT_EQ(models.size(), 2); + for (auto& nameModel : models) { + for (auto& versionModelInstance : nameModel.second->getModelVersions()) { + ASSERT_EQ(ovms::ModelVersionState::AVAILABLE, versionModelInstance.second->getStatus().getState()); + } + } + // we remove SECOND_MODEL from config file and expect to have all versions of it retired + createConfigFileWithContent(relative_config_1_model, fileToReload); + waitForOVMSConfigReload(manager); + models = manager.getModels(); + ASSERT_EQ(models.size(), 2); + for (auto& versionModelInstance : manager.getModels().at(FIRST_MODEL_NAME)->getModelVersions()) { + EXPECT_EQ(ovms::ModelVersionState::AVAILABLE, versionModelInstance.second->getStatus().getState()); + } + for (auto& versionModelInstance : manager.getModels().at(SECOND_MODEL_NAME)->getModelVersions()) { + EXPECT_EQ(ovms::ModelVersionState::END, versionModelInstance.second->getStatus().getState()); + } + manager.join(); +} + class MockModelInstanceInStateWithConfig : public ovms::ModelInstance { static const ovms::model_version_t UNUSED_VERSION = 987789; @@ -1270,7 +1525,7 @@ class MockModelInstanceInStateWithConfig : public ovms::ModelInstance { } }; -std::map> getMockedModelVersionInstances( +static std::map> getMockedModelVersionInstances( std::map initialVersionStates, ov::Core& ieCore, const ovms::ModelConfig& modelConfig = ovms::ModelConfig{}) { @@ -1603,7 +1858,7 @@ TEST_F(ReloadAvailableModelDueToConfigChange, SameConfig_ExpectNoReloads) { TEST_F(ReloadAvailableModelDueToConfigChange, ExpectReloadDueToBasePathChange) { mockModelVersionInstances = getMockedModelVersionInstances(initialVersions, *ieCore, config); - config.setBasePath("new/custom/path"); + config.setBasePath("/new/custom/path"); ovms::ModelManager::getVersionsToChange(config, mockModelVersionInstances, requestedVersions, versionsToStart, versionsToReload, versionsToRetire); EXPECT_THAT(*versionsToReload, UnorderedElementsAre(3)); } @@ -1716,8 +1971,6 @@ TEST_F(ReloadAvailableModelDueToConfigChange, ExpectReloadDueToShapeConfiguratio class GetModelInstanceTest : public ::testing::Test {}; -class MockModel : public ovms::Model {}; - std::shared_ptr model; class MockModelManagerWith1Model : public ovms::ModelManager { diff --git a/src/test/modelversionstatus_test.cpp b/src/test/modelversionstatus_test.cpp index d0f8f2d12d..9f8eba20d8 100644 --- a/src/test/modelversionstatus_test.cpp +++ b/src/test/modelversionstatus_test.cpp @@ -19,6 +19,7 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wall" +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" #include "tensorflow_serving/apis/get_model_status.pb.h" #include "tensorflow_serving/apis/model_service.grpc.pb.h" #include "tensorflow_serving/apis/model_service.pb.h" diff --git a/src/test/node_library_manager_test.cpp b/src/test/node_library_manager_test.cpp index 1b737e3a1c..5b40bd3928 100644 --- a/src/test/node_library_manager_test.cpp +++ b/src/test/node_library_manager_test.cpp @@ -28,6 +28,13 @@ TEST(NodeLibraryManagerTest, NewManagerExpectMissingLibrary) { EXPECT_EQ(status, StatusCode::NODE_LIBRARY_MISSING); } +TEST(NodeLibraryManagerTest, UnSuccessfullLibraryLoading) { + CustomNodeLibraryManager manager; + NodeLibrary library; + auto status = manager.loadLibrary("random_name", "ovms/bazel-bin/src/lib_node_mock.so"); + ASSERT_EQ(status, StatusCode::PATH_INVALID); +} + TEST(NodeLibraryManagerTest, SuccessfullLibraryLoadingAndExecution) { CustomNodeLibraryManager manager; NodeLibrary library; diff --git a/src/test/ovinferrequestqueue_test.cpp b/src/test/ovinferrequestqueue_test.cpp index a6017cd4a8..59061820f6 100644 --- a/src/test/ovinferrequestqueue_test.cpp +++ b/src/test/ovinferrequestqueue_test.cpp @@ -47,7 +47,7 @@ TEST(OVInferRequestQueue, ShortQueue) { EXPECT_EQ(reqid, 0); } -void releaseStream(ovms::OVInferRequestsQueue& requestsQueue) { +static void releaseStream(ovms::OVInferRequestsQueue& requestsQueue) { std::this_thread::sleep_for(std::chrono::seconds(1)); requestsQueue.returnStream(3); } diff --git a/src/test/passthrough/1/passthrough.bin b/src/test/passthrough/1/passthrough.bin new file mode 100644 index 0000000000..f76dd238ad Binary files /dev/null and b/src/test/passthrough/1/passthrough.bin differ diff --git a/src/test/passthrough/1/passthrough.xml b/src/test/passthrough/1/passthrough.xml new file mode 100644 index 0000000000..75aecbfb16 --- /dev/null +++ b/src/test/passthrough/1/passthrough.xml @@ -0,0 +1,66 @@ + + + + + + + + -1 + -1 + + + + + + + + 1 + 1 + + + + + + + + -1 + -1 + + + 1 + 1 + + + + + -1 + -1 + + + + + + + -1 + -1 + + + + + + + + + + + + + + + + + + + + + diff --git a/src/test/pipelinedefinitionstatus_test.cpp b/src/test/pipelinedefinitionstatus_test.cpp index e0e7b021f4..578ad38295 100644 --- a/src/test/pipelinedefinitionstatus_test.cpp +++ b/src/test/pipelinedefinitionstatus_test.cpp @@ -31,10 +31,11 @@ using testing::_; using testing::Return; const std::string unusedPipelineName{"UNUSED_PIPELINE_NAME"}; +const std::string unusedPipelineType{"UNUSED_PIPELINE_TYPE"}; const std::string modelNotifyingDetails{"Model:NonExisting version:i^2"}; TEST(PipelineDefinitionStatus, ValidationPassThenRetire) { - PipelineDefinitionStatus pds(unusedPipelineName); + PipelineDefinitionStatus pds(unusedPipelineType, unusedPipelineName); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::BEGIN); pds.handle(ValidationPassedEvent()); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::AVAILABLE); @@ -43,7 +44,7 @@ TEST(PipelineDefinitionStatus, ValidationPassThenRetire) { } TEST(PipelineDefinitionStatus, ValidationPassThenUsedModelChangeThenPassThenRetire) { - PipelineDefinitionStatus pds(unusedPipelineName); + PipelineDefinitionStatus pds(unusedPipelineType, unusedPipelineName); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::BEGIN); pds.handle(ValidationPassedEvent()); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::AVAILABLE); @@ -56,7 +57,7 @@ TEST(PipelineDefinitionStatus, ValidationPassThenUsedModelChangeThenPassThenReti } TEST(PipelineDefinitionStatus, ValidationFailThenRetire) { - PipelineDefinitionStatus pds(unusedPipelineName); + PipelineDefinitionStatus pds(unusedPipelineType, unusedPipelineName); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::BEGIN); pds.handle(ValidationFailedEvent()); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::LOADING_PRECONDITION_FAILED); @@ -65,7 +66,7 @@ TEST(PipelineDefinitionStatus, ValidationFailThenRetire) { } TEST(PipelineDefinitionStatus, ValidationFailThenUsedModelChangeThriceThenRetire) { - PipelineDefinitionStatus pds(unusedPipelineName); + PipelineDefinitionStatus pds(unusedPipelineType, unusedPipelineName); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::BEGIN); pds.handle(ValidationFailedEvent()); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::LOADING_PRECONDITION_FAILED); @@ -80,7 +81,7 @@ TEST(PipelineDefinitionStatus, ValidationFailThenUsedModelChangeThriceThenRetire } TEST(PipelineDefinitionStatus, ValidationPassThenUsedModelChangeThriceThenRetire) { - PipelineDefinitionStatus pds(unusedPipelineName); + PipelineDefinitionStatus pds(unusedPipelineType, unusedPipelineName); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::BEGIN); pds.handle(ValidationPassedEvent()); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::AVAILABLE); @@ -95,7 +96,7 @@ TEST(PipelineDefinitionStatus, ValidationPassThenUsedModelChangeThriceThenRetire } TEST(PipelineDefinitionStatus, ValidationPassThenThenRetire) { - PipelineDefinitionStatus pds(unusedPipelineName); + PipelineDefinitionStatus pds(unusedPipelineType, unusedPipelineName); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::BEGIN); pds.handle(ValidationPassedEvent()); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::AVAILABLE); @@ -104,7 +105,7 @@ TEST(PipelineDefinitionStatus, ValidationPassThenThenRetire) { } TEST(PipelineDefinitionStatus, ValidationPassThenThenRetireThenUsedModelChangeShouldThrow) { - PipelineDefinitionStatus pds(unusedPipelineName); + PipelineDefinitionStatus pds(unusedPipelineType, unusedPipelineName); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::BEGIN); pds.handle(ValidationPassedEvent()); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::AVAILABLE); @@ -114,7 +115,7 @@ TEST(PipelineDefinitionStatus, ValidationPassThenThenRetireThenUsedModelChangeSh } TEST(PipelineDefinitionStatus, ValidationPassThenThenRetireThenRetireShouldThrow) { - PipelineDefinitionStatus pds(unusedPipelineName); + PipelineDefinitionStatus pds(unusedPipelineType, unusedPipelineName); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::BEGIN); pds.handle(ValidationPassedEvent()); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::AVAILABLE); @@ -124,7 +125,7 @@ TEST(PipelineDefinitionStatus, ValidationPassThenThenRetireThenRetireShouldThrow } TEST(PipelineDefinitionStatus, ValidationPassThenRetireThenReloadThenValidationPass) { - PipelineDefinitionStatus pds(unusedPipelineName); + PipelineDefinitionStatus pds(unusedPipelineType, unusedPipelineName); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::BEGIN); pds.handle(ValidationPassedEvent()); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::AVAILABLE); @@ -137,7 +138,7 @@ TEST(PipelineDefinitionStatus, ValidationPassThenRetireThenReloadThenValidationP } TEST(PipelineDefinitionStatus, ValidationPassThenRetireThenReloadThenValidationFail) { - PipelineDefinitionStatus pds(unusedPipelineName); + PipelineDefinitionStatus pds(unusedPipelineType, unusedPipelineName); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::BEGIN); pds.handle(ValidationPassedEvent()); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::AVAILABLE); @@ -150,13 +151,13 @@ TEST(PipelineDefinitionStatus, ValidationPassThenRetireThenReloadThenValidationF } TEST(PipelineDefinitionStatus, RetireShouldThrow) { - PipelineDefinitionStatus pds(unusedPipelineName); + PipelineDefinitionStatus pds(unusedPipelineType, unusedPipelineName); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::BEGIN); ASSERT_THROW(pds.handle(RetireEvent()), std::logic_error); } TEST(PipelineDefinitionStatus, ValidationPassThenValidationPassShouldThrow) { - PipelineDefinitionStatus pds(unusedPipelineName); + PipelineDefinitionStatus pds(unusedPipelineType, unusedPipelineName); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::BEGIN); pds.handle(ValidationPassedEvent()); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::AVAILABLE); @@ -165,7 +166,7 @@ TEST(PipelineDefinitionStatus, ValidationPassThenValidationPassShouldThrow) { } TEST(PipelineDefinitionStatus, ValidationPassThenValidationFailShouldThrow) { - PipelineDefinitionStatus pds(unusedPipelineName); + PipelineDefinitionStatus pds(unusedPipelineType, unusedPipelineName); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::BEGIN); pds.handle(ValidationPassedEvent()); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::AVAILABLE); @@ -174,7 +175,7 @@ TEST(PipelineDefinitionStatus, ValidationPassThenValidationFailShouldThrow) { } TEST(PipelineDefinitionStatus, ValidationFailThenValidationFailShouldThrow) { - PipelineDefinitionStatus pds(unusedPipelineName); + PipelineDefinitionStatus pds(unusedPipelineType, unusedPipelineName); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::BEGIN); pds.handle(ValidationFailedEvent()); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::LOADING_PRECONDITION_FAILED); @@ -183,7 +184,7 @@ TEST(PipelineDefinitionStatus, ValidationFailThenValidationFailShouldThrow) { } TEST(PipelineDefinitionStatus, ValidationFailThenValidationPassShouldThrow) { - PipelineDefinitionStatus pds(unusedPipelineName); + PipelineDefinitionStatus pds(unusedPipelineType, unusedPipelineName); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::BEGIN); pds.handle(ValidationFailedEvent()); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::LOADING_PRECONDITION_FAILED); @@ -192,7 +193,7 @@ TEST(PipelineDefinitionStatus, ValidationFailThenValidationPassShouldThrow) { } TEST(PipelineDefinitionStatus, ValidationFailThenReloadThenValidationFail) { - PipelineDefinitionStatus pds(unusedPipelineName); + PipelineDefinitionStatus pds(unusedPipelineType, unusedPipelineName); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::BEGIN); pds.handle(ValidationFailedEvent()); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::LOADING_PRECONDITION_FAILED); @@ -203,7 +204,7 @@ TEST(PipelineDefinitionStatus, ValidationFailThenReloadThenValidationFail) { } TEST(PipelineDefinitionStatus, ValidationFailThenReloadThenValidationPass) { - PipelineDefinitionStatus pds(unusedPipelineName); + PipelineDefinitionStatus pds(unusedPipelineType, unusedPipelineName); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::BEGIN); pds.handle(ValidationFailedEvent()); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::LOADING_PRECONDITION_FAILED); @@ -214,7 +215,7 @@ TEST(PipelineDefinitionStatus, ValidationFailThenReloadThenValidationPass) { } TEST(PipelineDefinitionStatus, ValidationPassThenRetireThenValidationPassShouldThrow) { - PipelineDefinitionStatus pds(unusedPipelineName); + PipelineDefinitionStatus pds(unusedPipelineType, unusedPipelineName); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::BEGIN); pds.handle(ValidationPassedEvent()); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::AVAILABLE); @@ -225,7 +226,7 @@ TEST(PipelineDefinitionStatus, ValidationPassThenRetireThenValidationPassShouldT } TEST(PipelineDefinitionStatus, ValidationPassThenRetireThenValidationFailShouldThrow) { - PipelineDefinitionStatus pds(unusedPipelineName); + PipelineDefinitionStatus pds(unusedPipelineType, unusedPipelineName); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::BEGIN); pds.handle(ValidationPassedEvent()); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::AVAILABLE); @@ -235,12 +236,12 @@ TEST(PipelineDefinitionStatus, ValidationPassThenRetireThenValidationFailShouldT ASSERT_THROW(pds.handle(ValidationFailedEvent()), std::logic_error); } TEST(PipelineDefinitionStatus, ReloadFromBeginShouldThrow) { - PipelineDefinitionStatus pds(unusedPipelineName); + PipelineDefinitionStatus pds(unusedPipelineType, unusedPipelineName); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::BEGIN); ASSERT_THROW(pds.handle(ReloadEvent()), std::logic_error); } TEST(PipelineDefinitionStatus, ReloadFromReloadShouldThrow) { - PipelineDefinitionStatus pds(unusedPipelineName); + PipelineDefinitionStatus pds(unusedPipelineType, unusedPipelineName); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::BEGIN); pds.handle(ValidationPassedEvent()); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::AVAILABLE); @@ -249,12 +250,12 @@ TEST(PipelineDefinitionStatus, ReloadFromReloadShouldThrow) { ASSERT_THROW(pds.handle(ReloadEvent()), std::logic_error); } TEST(PipelineDefinitionStatus, UsedModelChangedFromBeginShouldThrow) { - PipelineDefinitionStatus pds(unusedPipelineName); + PipelineDefinitionStatus pds(unusedPipelineType, unusedPipelineName); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::BEGIN); ASSERT_THROW(pds.handle(UsedModelChangedEvent()), std::logic_error); } TEST(PipelineDefinitionStatus, UsedModelChangedFromReloadShouldThrow) { - PipelineDefinitionStatus pds(unusedPipelineName); + PipelineDefinitionStatus pds(unusedPipelineType, unusedPipelineName); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::BEGIN); pds.handle(ValidationPassedEvent()); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::AVAILABLE); @@ -263,7 +264,7 @@ TEST(PipelineDefinitionStatus, UsedModelChangedFromReloadShouldThrow) { ASSERT_THROW(pds.handle(UsedModelChangedEvent()), std::logic_error); } TEST(PipelineDefinitionStatus, RetireFromReloadShouldThrow) { - PipelineDefinitionStatus pds(unusedPipelineName); + PipelineDefinitionStatus pds(unusedPipelineType, unusedPipelineName); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::BEGIN); pds.handle(ValidationPassedEvent()); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::AVAILABLE); @@ -272,7 +273,7 @@ TEST(PipelineDefinitionStatus, RetireFromReloadShouldThrow) { ASSERT_THROW(pds.handle(RetireEvent()), std::logic_error); } TEST(PipelineDefinitionStatus, ValidationFailFromFailedRequiredRevalidation) { - PipelineDefinitionStatus pds(unusedPipelineName); + PipelineDefinitionStatus pds(unusedPipelineType, unusedPipelineName); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::BEGIN); pds.handle(ValidationFailedEvent()); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::LOADING_PRECONDITION_FAILED); @@ -282,7 +283,7 @@ TEST(PipelineDefinitionStatus, ValidationFailFromFailedRequiredRevalidation) { ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::LOADING_PRECONDITION_FAILED); } TEST(PipelineDefinitionStatus, ConvertToModelStatus) { - PipelineDefinitionStatus pds(unusedPipelineName); + PipelineDefinitionStatus pds(unusedPipelineType, unusedPipelineName); ASSERT_EQ(pds.getStateCode(), ovms::PipelineDefinitionStateCode::BEGIN); ASSERT_EQ((std::tuple(ModelVersionState::LOADING, ModelVersionStatusErrorCode::OK)), pds.convertToModelStatus()); pds.handle(ValidationPassedEvent()); diff --git a/src/test/predict_validation_test.cpp b/src/test/predict_validation_test.cpp index 32a39cab54..2fcf37d99a 100644 --- a/src/test/predict_validation_test.cpp +++ b/src/test/predict_validation_test.cpp @@ -62,11 +62,11 @@ class TfsPredictValidation : public ::testing::Test { preparePredictRequest(request, { {"Input_FP32_1_224_224_3_NHWC", - std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, + std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, {"Input_U8_1_3_62_62_NCHW", - std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, + std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, {"Input_I64_1_6_128_128_16_NCDHW", - std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, + std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, }); // U16 uses int_val instead of tensor_content so it needs separate test @@ -489,9 +489,9 @@ class TfsPredictValidationArbitraryBatchPosition : public TfsPredictValidation { preparePredictRequest(request, { {"Input_FP32_224_224_3_1_HWCN", - std::tuple{{224, 224, 3, 1}, ovms::Precision::FP32}}, + std::tuple{{224, 224, 3, 1}, ovms::Precision::FP32}}, {"Input_U8_3_1_128_CNH", - std::tuple{{3, 1, 128}, ovms::Precision::U8}}, + std::tuple{{3, 1, 128}, ovms::Precision::U8}}, }); } }; @@ -565,9 +565,9 @@ class TfsPredictValidationDynamicModel : public TfsPredictValidation { preparePredictRequest(request, { {"Input_FP32_any_224:512_224:512_3_NHWC", - std::tuple{{requestBatchSize, 300, 320, 3}, ovms::Precision::FP32}}, + std::tuple{{requestBatchSize, 300, 320, 3}, ovms::Precision::FP32}}, {"Input_U8_100:200_any_CN", - std::tuple{{101, requestBatchSize}, ovms::Precision::U8}}, + std::tuple{{101, requestBatchSize}, ovms::Precision::U8}}, }); } }; @@ -640,11 +640,11 @@ class TfsPredictValidationPrecision : public ::testing::TestWithParamsetPrecision(testedPrecision); + mockedInputsInfo[tensorName] = createTensorInfoCopyWithPrecision(mockedInputsInfo[tensorName], testedPrecision); preparePredictRequest(request, { {tensorName, - std::tuple{{1, DUMMY_MODEL_INPUT_SIZE}, testedPrecision}}, + std::tuple{{1, DUMMY_MODEL_INPUT_SIZE}, testedPrecision}}, }); auto status = ovms::request_validation_utils::validate(request, mockedInputsInfo, "dummy", ovms::model_version_t{1}); EXPECT_EQ(status, ovms::StatusCode::OK) << "Precision validation failed:" @@ -689,13 +689,13 @@ class KFSPredictValidation : public ::testing::Test { preparePredictRequest(request, {{"Input_FP32_1_224_224_3_NHWC", - std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, + std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, {"Input_U8_1_3_62_62_NCHW", - std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, + std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, {"Input_I64_1_6_128_128_16_NCDHW", - std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, + std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, {"Input_U16_1_2_8_4_NCHW", - std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}); + std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}); } }; @@ -759,7 +759,6 @@ TEST_F(KFSPredictValidation, RequestWrongBatchSizeAuto) { } TEST_F(KFSPredictValidation, ValidRequestBinaryInputs) { - modelConfig.setBatchingParams("auto"); std::string inputName = "Binary_Input"; ::KFSRequest binaryInputRequest; @@ -785,58 +784,6 @@ TEST_F(KFSPredictValidation, ValidRequestBinaryInputs) { EXPECT_TRUE(status.ok()); } -TEST_F(KFSPredictValidation, ValidRequestBinaryInputs_RawInputsContents) { - modelConfig.setBatchingParams("auto"); - std::string inputName = "Binary_Input"; - ::KFSRequest binaryInputRequest; - - auto input = binaryInputRequest.add_inputs(); - input->set_name(inputName); - input->set_datatype("BYTES"); - const int requestBatchSize = 1; - std::string bytes_contents = "BYTES_CONTENTS"; - auto content = binaryInputRequest.add_raw_input_contents(); - *content = bytes_contents; - input->mutable_shape()->Add(requestBatchSize); - - servableInputs.clear(); - ovms::shape_t shape = {1, 3, 224, 224}; - servableInputs[inputName] = std::make_shared( - inputName, - ovms::Precision::FP32, - shape, - ovms::Layout{"NHWC"}); - - auto status = instance->mockValidate(&binaryInputRequest); - EXPECT_TRUE(status.ok()); -} - -TEST_F(KFSPredictValidation, RawInputsContentsBatchSizeBiggerThan1) { - modelConfig.setBatchingParams("auto"); - std::string inputName = "Binary_Input"; - ::KFSRequest binaryInputRequest; - - auto input = binaryInputRequest.add_inputs(); - input->set_name(inputName); - input->set_datatype("BYTES"); - const int requestBatchSize = 2; - std::string bytes_contents = "BYTES_CONTENTS"; - auto content = binaryInputRequest.add_raw_input_contents(); - *content = bytes_contents; - input->mutable_shape()->Add(requestBatchSize); - - servableInputs.clear(); - ovms::shape_t shape = {2, 3, 224, 224}; - servableInputs[inputName] = std::make_shared( - inputName, - ovms::Precision::FP32, - shape, - ovms::Layout{"NHWC"}); - - auto status = instance->mockValidate(&binaryInputRequest); - EXPECT_EQ(status, ovms::StatusCode::INVALID_BATCH_SIZE); -} - TEST_F(KFSPredictValidation, RequestWrongBatchSizeBinaryInputs) { std::string inputName = "Binary_Input"; ::KFSRequest binaryInputRequest; @@ -1062,7 +1009,7 @@ TEST_F(KFSPredictValidationInputTensorContent, RequestInputTensorContentAndRawIn ON_CALL(*instance, getModelConfig()).WillByDefault(ReturnRef(modelConfig)); preparePredictRequest(request, {{inputName, - std::tuple{{1, 2}, testedPrecision}}}, + std::tuple{{1, 2}, testedPrecision}}}, {}, // data, false); // put buffer in InputTensorContent auto buf = findKFSInferInputTensor(request, inputName)->mutable_contents()->mutable_fp32_contents(); @@ -1085,7 +1032,7 @@ TEST_P(KFSPredictValidationInputTensorContent, RequestCorrectContentSizeInputTen ON_CALL(*instance, getModelConfig()).WillByDefault(ReturnRef(modelConfig)); preparePredictRequest(request, {{inputName, - std::tuple{{1, 224, 224, 3}, testedPrecision}}}, + std::tuple{{1, 224, 224, 3}, testedPrecision}}}, {}, // data, true); // put buffer in InputTensorContent auto status = instance->mockValidate(&request); @@ -1121,13 +1068,13 @@ class KFSPredictValidationInputTensorContentNegative : public ::testing::Test { preparePredictRequest(request, {{"Input_FP32_1_224_224_3_NHWC", - std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, + std::tuple{{1, 224, 224, 3}, ovms::Precision::FP32}}, {"Input_U8_1_3_62_62_NCHW", - std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, + std::tuple{{1, 3, 62, 62}, ovms::Precision::U8}}, {"Input_I64_1_6_128_128_16_NCDHW", - std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, + std::tuple{{1, 6, 128, 128, 16}, ovms::Precision::I64}}, {"Input_U16_1_2_8_4_NCHW", - std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, + std::tuple{{1, 2, 8, 4}, ovms::Precision::U16}}}, {}, // data, true); // put buffer in InputTensorContent } @@ -1170,6 +1117,184 @@ TEST_F(KFSPredictValidation, RequestNegativeValueInShape) { EXPECT_EQ(status, ovms::StatusCode::INVALID_SHAPE); } +class KFSPredictValidationRawInputContents : public KFSPredictValidation { +protected: + std::string stringData = "BYTES_CONTENTS"; + uint8_t stringDataSize[4] = {0x0E, 0x00, 0x00, 0x00}; + ::KFSRequest binaryInputRequest; + ::KFSRequest::InferInputTensor* input; + std::string inputName = "Binary_Input"; + std::string* content; + + void SetUp() override { + KFSPredictValidation::SetUp(); + + input = binaryInputRequest.add_inputs(); + input->set_name(inputName); + input->set_datatype("BYTES"); + const int requestBatchSize = 1; + std::string buffer; + buffer.append((char*)stringDataSize, 4); + buffer.append(stringData); + content = binaryInputRequest.add_raw_input_contents(); + *content = buffer; + input->mutable_shape()->Add(requestBatchSize); + + servableInputs.clear(); + } +}; + +TEST_F(KFSPredictValidationRawInputContents, ValidRequest) { + ovms::shape_t shape = {1, 15}; + servableInputs[inputName] = std::make_shared( + inputName, + ovms::Precision::U8, + shape, + ovms::Layout{"NHWC"}); + + auto status = instance->mockValidate(&binaryInputRequest); + EXPECT_TRUE(status.ok()); +} + +TEST_F(KFSPredictValidationRawInputContents, ValidRequest_BatchSizeBiggerThan1) { + content->append((char*)stringDataSize, 4); + content->append(stringData); + input->mutable_shape()->Clear(); + input->mutable_shape()->Add(2); + + servableInputs.clear(); + ovms::shape_t shape = {2, 15}; + servableInputs[inputName] = std::make_shared( + inputName, + ovms::Precision::U8, + shape, + ovms::Layout{"NHWC"}); + + auto status = instance->mockValidate(&binaryInputRequest); + EXPECT_EQ(status, ovms::StatusCode::OK); +} + +TEST_F(KFSPredictValidationRawInputContents, BatchSizeDoesNotMatchNumberOfStringInBuffer) { + content->append((char*)stringDataSize, 4); + content->append(stringData); + content->append((char*)stringDataSize, 4); + content->append(stringData); + input->mutable_shape()->Clear(); + input->mutable_shape()->Add(2); + + servableInputs.clear(); + ovms::shape_t shape = {1, 15}; + servableInputs[inputName] = std::make_shared( + inputName, + ovms::Precision::U8, + shape, + ovms::Layout{"NHWC"}); + + auto status = instance->mockValidate(&binaryInputRequest); + EXPECT_EQ(status, ovms::StatusCode::INVALID_BATCH_SIZE); +} + +TEST_F(KFSPredictValidationRawInputContents, InvalidBatchSize) { + content->append((char*)stringDataSize, 4); + content->append(stringData); + input->mutable_shape()->Clear(); + input->mutable_shape()->Add(2); + + servableInputs.clear(); + ovms::shape_t shape = {1, 15}; + servableInputs[inputName] = std::make_shared( + inputName, + ovms::Precision::U8, + shape, + ovms::Layout{"NHWC"}); + + auto status = instance->mockValidate(&binaryInputRequest); + EXPECT_EQ(status, ovms::StatusCode::INVALID_BATCH_SIZE); +} + +TEST_F(KFSPredictValidationRawInputContents, InvalidWidth) { + servableInputs.clear(); + ovms::shape_t shape = {1, 10}; + servableInputs[inputName] = std::make_shared( + inputName, + ovms::Precision::U8, + shape, + ovms::Layout{"NHWC"}); + + auto status = instance->mockValidate(&binaryInputRequest); + EXPECT_EQ(status, ovms::StatusCode::INVALID_SHAPE); +} + +TEST_F(KFSPredictValidationRawInputContents, InvalidBatchSize_BatchSizeChangeRequired) { + modelConfig.setBatchingParams("auto"); + content->append((char*)stringDataSize, 4); + content->append(stringData); + input->mutable_shape()->Clear(); + input->mutable_shape()->Add(2); + + servableInputs.clear(); + ovms::shape_t shape = {1, 15}; + servableInputs[inputName] = std::make_shared( + inputName, + ovms::Precision::U8, + shape, + ovms::Layout{"NHWC"}); + + auto status = instance->mockValidate(&binaryInputRequest); + EXPECT_EQ(status, ovms::StatusCode::BATCHSIZE_CHANGE_REQUIRED); +} + +TEST_F(KFSPredictValidationRawInputContents, InvalidWidth_ReshapeRequired) { + modelConfig.parseShapeParameter("auto"); + + servableInputs.clear(); + ovms::shape_t shape = {1, 10}; + servableInputs[inputName] = std::make_shared( + inputName, + ovms::Precision::U8, + shape, + ovms::Layout{"NHWC"}); + + auto status = instance->mockValidate(&binaryInputRequest); + EXPECT_EQ(status, ovms::StatusCode::RESHAPE_REQUIRED); +} + +TEST_F(KFSPredictValidationRawInputContents, InputTooSmall) { + uint8_t invalidBuffer[] = {0x0E, 0x00, 0x00}; + binaryInputRequest.mutable_raw_input_contents()->Clear(); + auto invalidContent = binaryInputRequest.add_raw_input_contents(); + invalidContent->append((char*)invalidBuffer, 3); + + servableInputs.clear(); + ovms::shape_t shape = {1, 3}; + servableInputs[inputName] = std::make_shared( + inputName, + ovms::Precision::U8, + shape, + ovms::Layout{"NHWC"}); + + auto status = instance->mockValidate(&binaryInputRequest); + EXPECT_EQ(status, ovms::StatusCode::INVALID_INPUT_FORMAT); +} + +TEST_F(KFSPredictValidationRawInputContents, InvalidFormat) { + uint8_t invalidBuffer[] = {0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + binaryInputRequest.mutable_raw_input_contents()->Clear(); + auto invalidContent = binaryInputRequest.add_raw_input_contents(); + invalidContent->append((char*)invalidBuffer, 7); + + servableInputs.clear(); + ovms::shape_t shape = {1}; + servableInputs[inputName] = std::make_shared( + inputName, + ovms::Precision::U8, + shape, + ovms::Layout{"NHWC"}); + + auto status = instance->mockValidate(&binaryInputRequest); + EXPECT_EQ(status, ovms::StatusCode::INVALID_INPUT_FORMAT); +} + class KFSPredictValidationArbitraryBatchPosition : public KFSPredictValidation { protected: void SetUp() override { @@ -1185,9 +1310,9 @@ class KFSPredictValidationArbitraryBatchPosition : public KFSPredictValidation { preparePredictRequest(request, { {"Input_FP32_224_224_3_1_HWCN", - std::tuple{{224, 224, 3, 1}, ovms::Precision::FP32}}, + std::tuple{{224, 224, 3, 1}, ovms::Precision::FP32}}, {"Input_U8_3_1_128_CNH", - std::tuple{{3, 1, 128}, ovms::Precision::U8}}, + std::tuple{{3, 1, 128}, ovms::Precision::U8}}, }); } }; @@ -1244,9 +1369,9 @@ class KFSPredictValidationDynamicModel : public KFSPredictValidation { preparePredictRequest(request, { {"Input_FP32_any_224:512_224:512_3_NHWC", - std::tuple{{requestBatchSize, 300, 320, 3}, ovms::Precision::FP32}}, + std::tuple{{requestBatchSize, 300, 320, 3}, ovms::Precision::FP32}}, {"Input_U8_100:200_any_CN", - std::tuple{{101, requestBatchSize}, ovms::Precision::U8}}, + std::tuple{{101, requestBatchSize}, ovms::Precision::U8}}, }); } }; @@ -1302,11 +1427,11 @@ class KFSPredictValidationPrecision : public ::testing::TestWithParamsetPrecision(testedPrecision); + mockedInputsInfo[tensorName] = createTensorInfoCopyWithPrecision(mockedInputsInfo[tensorName], testedPrecision); preparePredictRequest(request, { {tensorName, - std::tuple{{1, DUMMY_MODEL_INPUT_SIZE}, testedPrecision}}, + std::tuple{{1, DUMMY_MODEL_INPUT_SIZE}, testedPrecision}}, }); auto status = ovms::request_validation_utils::validate(request, mockedInputsInfo, "dummy", ovms::model_version_t{1}); EXPECT_EQ(status, ovms::StatusCode::OK) << "Precision validation failed:" @@ -1328,4 +1453,201 @@ INSTANTIATE_TEST_SUITE_P( [](const ::testing::TestParamInfo& info) { return toString(info.param); }); + +static void prepareInferStringInputWithTwoDimensionShapeTensor(::KFSRequest& request, const std::string& name) { + KFSTensorInputProto* tensor = request.add_inputs(); + tensor->set_name(name); + tensor->set_datatype("BYTES"); + tensor->mutable_shape()->Clear(); + tensor->add_shape(1); + tensor->add_shape(1); +} + +static void prepareInferStringInputWithTwoDimensionShapeTensor(tensorflow::serving::PredictRequest& request, const std::string& name) { + request.mutable_inputs()->clear(); + auto& input = (*request.mutable_inputs())[name]; + input.set_dtype(tensorflow::DataType::DT_STRING); + input.mutable_tensor_shape()->add_dim()->set_size(1); + input.mutable_tensor_shape()->add_dim()->set_size(1); +} + +static void prepareInferStringInputWithNegativeShape(::KFSRequest& request, const std::string& name) { + KFSTensorInputProto* tensor = request.add_inputs(); + tensor->set_name(name); + tensor->set_datatype("BYTES"); + tensor->mutable_shape()->Clear(); + tensor->add_shape(-5); +} + +static void prepareInferStringInputWithNegativeShape(tensorflow::serving::PredictRequest& request, const std::string& name) { + request.mutable_inputs()->clear(); + auto& input = (*request.mutable_inputs())[name]; + input.set_dtype(tensorflow::DataType::DT_STRING); + input.mutable_tensor_shape()->add_dim()->set_size(-5); +} + +template +class PredictValidationString2DTest : public ::testing::Test { +protected: + TensorType request; + const char* tensorName = DUMMY_MODEL_INPUT_NAME; + ovms::tensor_map_t mockedInputsInfo; + void SetUp() override { + auto shape2d = ovms::Shape{-1, -1}; + mockedInputsInfo[tensorName] = std::make_shared(tensorName, ovms::Precision::U8, shape2d, ovms::Layout{"NC"}); + } +}; + +using MyTypes = ::testing::Types; +TYPED_TEST_SUITE(PredictValidationString2DTest, MyTypes); + +TYPED_TEST(PredictValidationString2DTest, positive) { + // bs=1 + std::vector inputStrings = {"String_123"}; + prepareInferStringRequest(this->request, this->tensorName, inputStrings); + auto status = ovms::request_validation_utils::validate(this->request, this->mockedInputsInfo, "dummy", ovms::model_version_t{1}); + EXPECT_EQ(status, ovms::StatusCode::OK); + this->request.Clear(); + // bs=2 + inputStrings = {"String_123", "other"}; + prepareInferStringRequest(this->request, this->tensorName, inputStrings); + status = ovms::request_validation_utils::validate(this->request, this->mockedInputsInfo, "dummy", ovms::model_version_t{1}); + EXPECT_EQ(status, ovms::StatusCode::OK); +} + +TYPED_TEST(PredictValidationString2DTest, positive_data_in_buffer) { + if (typeid(TypeParam) == typeid(TFSRequestType)) + GTEST_SKIP() << "String inputs in buffer not supported for TFS api"; + // bs=1 + std::vector inputStrings = {"String_123"}; + prepareInferStringRequest(this->request, this->tensorName, inputStrings, false); + auto status = ovms::request_validation_utils::validate(this->request, this->mockedInputsInfo, "dummy", ovms::model_version_t{1}); + EXPECT_EQ(status, ovms::StatusCode::OK); + this->request.Clear(); + // bs=2 + inputStrings = {"String_123", "other"}; + prepareInferStringRequest(this->request, this->tensorName, inputStrings, false); + status = ovms::request_validation_utils::validate(this->request, this->mockedInputsInfo, "dummy", ovms::model_version_t{1}); + EXPECT_EQ(status, ovms::StatusCode::OK); +} + +TYPED_TEST(PredictValidationString2DTest, negative_no_string) { + std::vector inputStrings = {}; + prepareInferStringRequest(this->request, this->tensorName, inputStrings); + auto status = ovms::request_validation_utils::validate(this->request, this->mockedInputsInfo, "dummy", ovms::model_version_t{1}); + EXPECT_EQ(status, ovms::StatusCode::INVALID_SHAPE); +} + +TYPED_TEST(PredictValidationString2DTest, negative_over_1gb_after_expansion) { + std::string longString(1024 * 1024 * 512 * 1, 'a'); // 512mb + std::vector inputStrings = {longString, "", ""}; // sum=1.5gb + prepareInferStringRequest(this->request, this->tensorName, inputStrings); + auto status = ovms::request_validation_utils::validate(this->request, this->mockedInputsInfo, "dummy", ovms::model_version_t{1}); + EXPECT_EQ(status, ovms::StatusCode::INVALID_STRING_MAX_SIZE_EXCEEDED); +} + +TYPED_TEST(PredictValidationString2DTest, negative_no_string_in_buffer) { + if (typeid(TypeParam) == typeid(TFSRequestType)) + GTEST_SKIP() << "String inputs in buffer not supported for TFS api"; + std::vector inputStrings = {}; + prepareInferStringRequest(this->request, this->tensorName, inputStrings, false); + auto status = ovms::request_validation_utils::validate(this->request, this->mockedInputsInfo, "dummy", ovms::model_version_t{1}); + EXPECT_EQ(status, ovms::StatusCode::INVALID_SHAPE); +} + +TYPED_TEST(PredictValidationString2DTest, negative_shape_has_more_dimensions_than_1) { + prepareInferStringInputWithTwoDimensionShapeTensor(this->request, this->tensorName); + auto status = ovms::request_validation_utils::validate(this->request, this->mockedInputsInfo, "dummy", ovms::model_version_t{1}); + EXPECT_EQ(status, ovms::StatusCode::INVALID_NO_OF_SHAPE_DIMENSIONS); +} + +TYPED_TEST(PredictValidationString2DTest, negative_shape_has_negative_shape_value) { + prepareInferStringInputWithNegativeShape(this->request, this->tensorName); + auto status = ovms::request_validation_utils::validate(this->request, this->mockedInputsInfo, "dummy", ovms::model_version_t{1}); + EXPECT_EQ(status, ovms::StatusCode::INVALID_SHAPE); +} + +TYPED_TEST(PredictValidationString2DTest, batchsize_change_required) { + this->mockedInputsInfo[this->tensorName] = std::make_shared(this->tensorName, ovms::Precision::U8, ovms::Shape{3, -1}, ovms::Layout{"NC"}); + std::vector inputStrings = {"String_123"}; + prepareInferStringRequest(this->request, this->tensorName, inputStrings); + auto status = ovms::request_validation_utils::validate(this->request, this->mockedInputsInfo, "dummy", ovms::model_version_t{1}, {}, ovms::Mode::AUTO); + EXPECT_EQ(status, ovms::StatusCode::BATCHSIZE_CHANGE_REQUIRED); +} + +TYPED_TEST(PredictValidationString2DTest, shape_change_required) { + this->mockedInputsInfo[this->tensorName] = std::make_shared(this->tensorName, ovms::Precision::U8, ovms::Shape{-1, 4}, ovms::Layout{"NC"}); + std::vector inputStrings = {"String_123"}; + prepareInferStringRequest(this->request, this->tensorName, inputStrings); + ovms::ShapeInfo inputShape{ovms::AUTO, {-1, 4}}; + ovms::shapes_info_map_t shapeMap; + shapeMap[this->tensorName] = inputShape; + auto status = ovms::request_validation_utils::validate(this->request, this->mockedInputsInfo, "dummy", ovms::model_version_t{1}, {}, ovms::Mode::FIXED, shapeMap); + EXPECT_EQ(status, ovms::StatusCode::RESHAPE_REQUIRED); +} + +TYPED_TEST(PredictValidationString2DTest, string_not_allowed_with_demultiplexer) { + this->mockedInputsInfo[this->tensorName] = this->mockedInputsInfo[this->tensorName]->createCopyWithDemultiplexerDimensionPrefix(ovms::Dimension::any()); + std::vector inputStrings = {"String_123"}; + prepareInferStringRequest(this->request, this->tensorName, inputStrings); + auto status = ovms::request_validation_utils::validate(this->request, this->mockedInputsInfo, "dummy", ovms::model_version_t{1}); + EXPECT_EQ(status, ovms::StatusCode::NOT_IMPLEMENTED); +} + +template +class PredictValidationString1DTest : public ::testing::Test { +protected: + TensorType request; + const char* tensorName = DUMMY_MODEL_INPUT_NAME; + ovms::tensor_map_t mockedInputsInfo; + void SetUp() override { + auto shape1d = ovms::Shape{-1}; + mockedInputsInfo[tensorName] = std::make_shared(tensorName, ovms::Precision::U8, shape1d, ovms::Layout{"NC"}); + } +}; + +using MyTypes = ::testing::Types; +TYPED_TEST_SUITE(PredictValidationString1DTest, MyTypes); + +TYPED_TEST(PredictValidationString1DTest, positive) { + // bs=1 + std::vector inputStrings = {"String_123"}; + prepareInferStringRequest(this->request, this->tensorName, inputStrings); + auto status = ovms::request_validation_utils::validate(this->request, this->mockedInputsInfo, "dummy", ovms::model_version_t{1}); + EXPECT_EQ(status, ovms::StatusCode::OK); + // bs=2 + inputStrings = {"String_123", "other"}; + prepareInferStringRequest(this->request, this->tensorName, inputStrings); + status = ovms::request_validation_utils::validate(this->request, this->mockedInputsInfo, "dummy", ovms::model_version_t{1}); + EXPECT_EQ(status, ovms::StatusCode::OK); +} + +TYPED_TEST(PredictValidationString1DTest, negative_wrong_request_shape) { + prepareInferStringInputWithTwoDimensionShapeTensor(this->request, this->tensorName); + auto status = ovms::request_validation_utils::validate(this->request, this->mockedInputsInfo, "dummy", ovms::model_version_t{1}); + EXPECT_EQ(status, ovms::StatusCode::INVALID_NO_OF_SHAPE_DIMENSIONS); +} + +TYPED_TEST(PredictValidationString1DTest, positive_over_1gb) { + std::string longString(1024 * 1024 * 512 * 1, 'a'); // 512mb + std::vector inputStrings = {longString, "", ""}; // sum=1.5gb + prepareInferStringRequest(this->request, this->tensorName, inputStrings); + auto status = ovms::request_validation_utils::validate(this->request, this->mockedInputsInfo, "dummy", ovms::model_version_t{1}); + EXPECT_EQ(status, ovms::StatusCode::OK); +} + +TYPED_TEST(PredictValidationString1DTest, negative_negative_shape) { + prepareInferStringInputWithNegativeShape(this->request, this->tensorName); + auto status = ovms::request_validation_utils::validate(this->request, this->mockedInputsInfo, "dummy", ovms::model_version_t{1}); + EXPECT_EQ(status, ovms::StatusCode::INVALID_SHAPE); +} + +TYPED_TEST(PredictValidationString1DTest, string_not_allowed_with_demultiplexer) { + this->mockedInputsInfo[this->tensorName] = this->mockedInputsInfo[this->tensorName]->createCopyWithDemultiplexerDimensionPrefix(ovms::Dimension::any()); + std::vector inputStrings = {"String_123"}; + prepareInferStringRequest(this->request, this->tensorName, inputStrings); + auto status = ovms::request_validation_utils::validate(this->request, this->mockedInputsInfo, "dummy", ovms::model_version_t{1}); + EXPECT_EQ(status, ovms::StatusCode::NOT_IMPLEMENTED); +} + #pragma GCC diagnostic pop diff --git a/src/test/prediction_service_test.cpp b/src/test/prediction_service_test.cpp index ab2b7b757d..3a3b2ac57f 100644 --- a/src/test/prediction_service_test.cpp +++ b/src/test/prediction_service_test.cpp @@ -89,7 +89,7 @@ static ovms::Status getOutput(const TFSResponseType& response, const std::string return StatusCode::INVALID_MISSING_INPUT; } -using inputs_info_elem_t = std::pair>; +using inputs_info_elem_t = std::pair>; static size_t calculateByteSize(const inputs_info_elem_t& e) { auto& [inputName, shapeDatatypeTuple] = e; auto& [shape, precision] = shapeDatatypeTuple; @@ -154,7 +154,7 @@ class TestPredict : public ::testing::Test { Preparer preparer; preparer.preparePredictRequest(request, {{DUMMY_MODEL_INPUT_NAME, - std::tuple{{(initialBatchSize + (i % 3)), 10}, ovms::Precision::FP32}}}); + std::tuple{{(initialBatchSize + (i % 3)), 10}, ovms::Precision::FP32}}}); performPredict(config.getName(), config.getVersion(), request, std::move(std::make_unique>(releaseWaitBeforeGettingModelInstance[i].get_future()))); @@ -168,7 +168,7 @@ class TestPredict : public ::testing::Test { Preparer preparer; preparer.preparePredictRequest(request, {{DUMMY_MODEL_INPUT_NAME, - std::tuple{{initialBatchSize, 10}, ovms::Precision::FP32}}}); + std::tuple{{initialBatchSize, 10}, ovms::Precision::FP32}}}); performPredict(config.getName(), config.getVersion(), request, nullptr, std::move(std::make_unique>(releaseWaitBeforePerformInference[i].get_future()))); @@ -205,7 +205,7 @@ class TestPredict : public ::testing::Test { Preparer preparer; preparer.preparePredictRequest(request, {{DUMMY_MODEL_INPUT_NAME, - std::tuple{{(initialBatchSize + i), 10}, ovms::Precision::FP32}}}); + std::tuple{{(initialBatchSize + i), 10}, ovms::Precision::FP32}}}); performPredict(config.getName(), config.getVersion(), request, std::move(std::make_unique>(releaseWaitBeforeGettingModelInstance[i].get_future()))); })); @@ -221,8 +221,17 @@ class TestPredict : public ::testing::Test { } } - void checkOutputShape(const ResponseType& response, const signed_shape_t& shape, const std::string& outputName = "a"); + void checkOutputShape(const ResponseType& response, const ovms::signed_shape_t& shape, const std::string& outputName = "a"); + static void checkOutputValuesU8(const TFSResponseType& response, const std::vector& expectedValues, const std::string& outputName = INCREMENT_1x3x4x5_MODEL_OUTPUT_NAME, bool checkRaw = true) { + ASSERT_EQ(response.outputs().count(outputName), 1); + const auto& output_tensor = response.outputs().at(outputName); + uint8_t* buffer = reinterpret_cast(const_cast(output_tensor.tensor_content().data())); + std::vector actualValues(buffer, buffer + output_tensor.tensor_content().size() / sizeof(uint8_t)); + ASSERT_EQ(actualValues.size(), expectedValues.size()); + ASSERT_EQ(0, std::memcmp(actualValues.data(), expectedValues.data(), expectedValues.size() * sizeof(uint8_t))) + << readableError(expectedValues.data(), actualValues.data(), expectedValues.size() * sizeof(uint8_t)); + } static void checkOutputValues(const TFSResponseType& response, const std::vector& expectedValues, const std::string& outputName = INCREMENT_1x3x4x5_MODEL_OUTPUT_NAME) { ASSERT_EQ(response.outputs().count(outputName), 1); const auto& output_tensor = response.outputs().at(outputName); @@ -231,6 +240,9 @@ class TestPredict : public ::testing::Test { ASSERT_EQ(0, std::memcmp(actualValues.data(), expectedValues.data(), expectedValues.size() * sizeof(float))) << readableError(expectedValues.data(), actualValues.data(), expectedValues.size() * sizeof(float)); } + static void checkOutputValuesU8(const ovms::InferenceResponse& res, const std::vector& expectedValues, const std::string& outputName = INCREMENT_1x3x4x5_MODEL_OUTPUT_NAME, bool checkRaw = true) { + FAIL() << "not supported"; + } static void checkOutputValues(const ovms::InferenceResponse& res, const std::vector& expectedValues, const std::string& outputName = INCREMENT_1x3x4x5_MODEL_OUTPUT_NAME) { InferenceResponse& response = const_cast(res); size_t outputCount = response.getOutputCount(); @@ -276,6 +288,27 @@ class TestPredict : public ::testing::Test { } } } + static void checkOutputValuesU8(const KFSResponse& response, const std::vector& expectedValues, const std::string& outputName = INCREMENT_1x3x4x5_MODEL_OUTPUT_NAME, bool checkRaw = true) { + KFSOutputTensorIteratorType it; + size_t bufferId; + auto status = getOutput(response, outputName, it, bufferId); + ASSERT_TRUE(status.ok()) << "Couln't find output:" << outputName; + if (checkRaw) { + ASSERT_GT(response.raw_output_contents().size(), 0); + uint8_t* buffer = reinterpret_cast(const_cast(response.raw_output_contents(bufferId).data())); + ASSERT_EQ(response.raw_output_contents(bufferId).size(), expectedValues.size()); + ASSERT_EQ(0, std::memcmp(buffer, expectedValues.data(), expectedValues.size() * sizeof(uint8_t))) + << readableError(expectedValues.data(), buffer, expectedValues.size() * sizeof(uint8_t)); + } else { + auto& responseOutput = *it; + ASSERT_EQ(responseOutput.datatype(), "UINT8") << "other precision testing not supported"; + ASSERT_EQ(expectedValues.size(), responseOutput.contents().uint_contents().size()); + for (size_t i = 0; i < expectedValues.size(); i++) { + ASSERT_EQ(expectedValues[i], responseOutput.contents().uint_contents(i)) + << "Wrong value at index " << i << ", expected: " << expectedValues[i] << " actual: " << responseOutput.contents().uint_contents(i); + } + } + } ovms::Status performInferenceWithRequest(const RequestType& request, ResponseType& response, const std::string& servableName = "dummy") { std::shared_ptr model; @@ -288,33 +321,33 @@ class TestPredict : public ::testing::Test { return model->infer(&request, &response, unload_guard); } - ovms::Status performInferenceWithShape(ResponseType& response, const signed_shape_t& shape = {1, 10}, const ovms::Precision precision = ovms::Precision::FP32) { + ovms::Status performInferenceWithShape(ResponseType& response, const ovms::signed_shape_t& shape = {1, 10}, const ovms::Precision precision = ovms::Precision::FP32) { RequestType request; Preparer preparer; preparer.preparePredictRequest(request, - {{DUMMY_MODEL_INPUT_NAME, std::tuple{shape, precision}}}); + {{DUMMY_MODEL_INPUT_NAME, std::tuple{shape, precision}}}); return performInferenceWithRequest(request, response); } ovms::Status performInferenceWithBatchSize(ResponseType& response, int batchSize = 1, const ovms::Precision precision = ovms::Precision::FP32, const size_t batchSizePosition = 0) { - signed_shape_t shape = {1, 10}; + ovms::signed_shape_t shape = {1, 10}; shape[batchSizePosition] = batchSize; RequestType request; Preparer preparer; preparer.preparePredictRequest(request, - {{DUMMY_MODEL_INPUT_NAME, std::tuple{shape, precision}}}); + {{DUMMY_MODEL_INPUT_NAME, std::tuple{shape, precision}}}); return performInferenceWithRequest(request, response); } - ovms::Status performInferenceWithImageInput(ResponseType& response, const signed_shape_t& shape, const std::vector& data = {}, const std::string& servableName = "increment_1x3x4x5", int batchSize = 1, const ovms::Precision precision = ovms::Precision::FP32) { + ovms::Status performInferenceWithImageInput(ResponseType& response, const ovms::signed_shape_t& shape, const std::vector& data = {}, const std::string& servableName = "increment_1x3x4x5", int batchSize = 1, const ovms::Precision precision = ovms::Precision::FP32) { RequestType request; Preparer preparer; if (data.size()) { preparePredictRequest(request, - {{INCREMENT_1x3x4x5_MODEL_INPUT_NAME, std::tuple{shape, precision}}}, data); + {{INCREMENT_1x3x4x5_MODEL_INPUT_NAME, std::tuple{shape, precision}}}, data); } else { preparer.preparePredictRequest(request, - {{INCREMENT_1x3x4x5_MODEL_INPUT_NAME, std::tuple{shape, precision}}}); + {{INCREMENT_1x3x4x5_MODEL_INPUT_NAME, std::tuple{shape, precision}}}); } return performInferenceWithRequest(request, response, servableName); } @@ -334,7 +367,7 @@ class TestPredict : public ::testing::Test { }; template <> -void TestPredict::checkOutputShape(const TFSResponseType& response, const signed_shape_t& shape, const std::string& outputName) { +void TestPredict::checkOutputShape(const TFSResponseType& response, const ovms::signed_shape_t& shape, const std::string& outputName) { ASSERT_EQ(response.outputs().count(outputName), 1); const auto& output_tensor = response.outputs().at(outputName); ASSERT_EQ(output_tensor.tensor_shape().dim_size(), shape.size()); @@ -344,7 +377,7 @@ void TestPredict::checkOutputShape(const TFSResponseType& response } template <> -void TestPredict::checkOutputShape(const ovms::InferenceResponse& cresponse, const signed_shape_t& shape, const std::string& outputName) { +void TestPredict::checkOutputShape(const ovms::InferenceResponse& cresponse, const ovms::signed_shape_t& shape, const std::string& outputName) { size_t outputCount = cresponse.getOutputCount(); EXPECT_GE(1, outputCount); size_t outputId = 0; @@ -369,7 +402,7 @@ void TestPredict::checkOutputShape(const ovms::InferenceResponse& } template <> -void TestPredict::checkOutputShape(const KFSResponse& response, const signed_shape_t& shape, const std::string& outputName) { +void TestPredict::checkOutputShape(const KFSResponse& response, const ovms::signed_shape_t& shape, const std::string& outputName) { auto it = response.outputs().begin(); size_t bufferId; auto status = getOutput(response, outputName, it, bufferId); @@ -485,7 +518,7 @@ TYPED_TEST(TestPredict, SuccesfullOnDummyModel) { Preparer preparer; preparer.preparePredictRequest(request, {{DUMMY_MODEL_INPUT_NAME, - std::tuple{{1, 10}, ovms::Precision::FP32}}}); + std::tuple{{1, 10}, ovms::Precision::FP32}}}); ovms::ModelConfig config = DUMMY_MODEL_CONFIG; config.setBatchSize(1); @@ -582,7 +615,7 @@ TYPED_TEST(TestPredictWithMapping, SuccesfullOnDummyModelWithMapping) { typename TypeParam::first_type request; preparer.preparePredictRequest(request, {{this->dummyModelInputMapping, - std::tuple{{1, 10}, ovms::Precision::FP32}}}); + std::tuple{{1, 10}, ovms::Precision::FP32}}}); this->SetUp(oneDummyWithMappedInputConfig); ovms::ModelConfig config = DUMMY_MODEL_CONFIG; ConstructorEnabledModelManager manager; @@ -591,12 +624,52 @@ TYPED_TEST(TestPredictWithMapping, SuccesfullOnDummyModelWithMapping) { performPrediction(config.getName(), config.getVersion(), request, nullptr, nullptr, manager, this->dummyModelInputMapping, this->dummyModelOutputMapping); } +TYPED_TEST(TestPredictWithMapping, SuccesfullOnPassthrough_2D_U8ModelWithMapping) { + if (typeid(typename TypeParam::first_type) == typeid(ovms::InferenceRequest)) + GTEST_SKIP() << "String inputs not supported for C-API"; + this->modelPath = this->directoryPath + "/passthrough/"; + this->mappingConfigPath = this->modelPath + "1/mapping_config.json"; + std::filesystem::copy("/ovms/src/test/passthrough", this->modelPath, std::filesystem::copy_options::recursive); + this->ovmsConfig = R"( +{ + "model_config_list": [ + { + "config": { + "name": "passhtrough_u8", + "base_path": "/ovms/src/test/passthrough" + } + } + ] +})"; + const std::string modelPathToReplace{"/ovms/src/test/passthrough"}; + auto it = this->ovmsConfig.find(modelPathToReplace); + ASSERT_NE(it, std::string::npos); + this->ovmsConfig.replace(this->ovmsConfig.find(modelPathToReplace), modelPathToReplace.size(), this->modelPath); + this->configFilePath = this->directoryPath + "/ovms_config.json"; + createConfigFileWithContent(this->ovmsConfig, this->configFilePath); + createConfigFileWithContent(R"({ + "outputs": {"copy:0": "copy:0_string"} + })", + this->mappingConfigPath); + ConstructorEnabledModelManager manager; + auto status = manager.loadConfig(this->configFilePath); + ASSERT_EQ(status, ovms::StatusCode::OK) << status.string(); + typename TypeParam::first_type request; + prepareInferStringRequest(request, PASSTHROUGH_MODEL_INPUT_NAME, {"String_123", "", "zebra"}); + std::shared_ptr modelInstance; + std::unique_ptr modelInstanceUnloadGuard; + ASSERT_EQ(manager.getModelInstance("passhtrough_u8", 1, modelInstance, modelInstanceUnloadGuard), ovms::StatusCode::OK); + typename TypeParam::second_type response; + ASSERT_EQ(modelInstance->infer(&request, &response, modelInstanceUnloadGuard), ovms::StatusCode::OK); + assertStringResponse(response, {"String_123", "", "zebra"}, "copy:0_string"); +} + TYPED_TEST(TestPredictWithMapping, SuccesfullOnDummyModelWithMappingSpecificShapeAuto) { Preparer preparer; typename TypeParam::first_type request; preparer.preparePredictRequest(request, {{this->dummyModelInputMapping, - std::tuple{{1, 5}, ovms::Precision::FP32}}}); + std::tuple{{1, 5}, ovms::Precision::FP32}}}); this->SetUp(oneDummyWithMappedInputSpecificAutoShapeConfig); ovms::ModelConfig config = DUMMY_MODEL_CONFIG; auto status = config.parseShapeParameter("auto"); @@ -610,7 +683,7 @@ TYPED_TEST(TestPredictWithMapping, SuccesfullOnDummyModelWithMappingAnonymousSha typename TypeParam::first_type request; preparer.preparePredictRequest(request, {{this->dummyModelInputMapping, - std::tuple{{1, 5}, ovms::Precision::FP32}}}); + std::tuple{{1, 5}, ovms::Precision::FP32}}}); this->SetUp(oneDummyWithMappedInputAnonymousAutoShapeConfig); ovms::ModelConfig config = DUMMY_MODEL_CONFIG; auto status = config.parseShapeParameter("auto"); @@ -625,7 +698,7 @@ TYPED_TEST(TestPredict, SuccesfullReloadFromAlreadyLoadedWithNewBatchSize) { typename TypeParam::first_type request; preparer.preparePredictRequest(request, {{DUMMY_MODEL_INPUT_NAME, - std::tuple{{1, 10}, ovms::Precision::FP32}}}); + std::tuple{{1, 10}, ovms::Precision::FP32}}}); ovms::ModelConfig config = DUMMY_MODEL_CONFIG; const auto initialBatchSize = config.getBatchSize(); config.setBatchSize(initialBatchSize); @@ -639,11 +712,11 @@ TYPED_TEST(TestPredict, SuccesfullReloadWhen1InferenceInProgress) { typename TypeParam::first_type requestBs1; preparer.preparePredictRequest(requestBs1, {{DUMMY_MODEL_INPUT_NAME, - std::tuple{{1, 10}, ovms::Precision::FP32}}}); + std::tuple{{1, 10}, ovms::Precision::FP32}}}); typename TypeParam::first_type requestBs2; preparer.preparePredictRequest(requestBs2, {{DUMMY_MODEL_INPUT_NAME, - std::tuple{{2, 10}, ovms::Precision::FP32}}}); + std::tuple{{2, 10}, ovms::Precision::FP32}}}); this->config.setBatchingParams("auto"); this->config.setNireq(2); @@ -674,11 +747,11 @@ TYPED_TEST(TestPredict, SuccesfullReloadWhen1InferenceAboutToStart) { typename TypeParam::first_type requestBs2; preparer.preparePredictRequest(requestBs2, {{DUMMY_MODEL_INPUT_NAME, - std::tuple{{2, 10}, ovms::Precision::FP32}}}); + std::tuple{{2, 10}, ovms::Precision::FP32}}}); typename TypeParam::first_type requestBs1; preparer.preparePredictRequest(requestBs1, {{DUMMY_MODEL_INPUT_NAME, - std::tuple{{1, 10}, ovms::Precision::FP32}}}); + std::tuple{{1, 10}, ovms::Precision::FP32}}}); this->config.setBatchingParams("auto"); this->config.setNireq(2); @@ -749,7 +822,7 @@ TYPED_TEST(TestPredict, SuccesfullReshapeViaRequestOnDummyModel) { typename TypeParam::first_type request; preparer.preparePredictRequest(request, {{DUMMY_MODEL_INPUT_NAME, - std::tuple{{1, 5}, ovms::Precision::FP32}}}); + std::tuple{{1, 5}, ovms::Precision::FP32}}}); typename TypeParam::second_type response; @@ -810,7 +883,7 @@ TYPED_TEST(TestPredict, ReshapeViaRequestAndConfigChange) { } /* * Scenario - perform inferences with different batch size and model reload via this->config.json change - * + * * 1. Load model with bs=auto, initial internal shape (1,10) * 2. Do the inference with (3,10) shape - expect status OK and result (3,10) * 3. Change model batch size to fixed=4 with this->config.json change @@ -854,7 +927,7 @@ TYPED_TEST(TestPredict, ChangeBatchSizeViaRequestAndConfigChange) { /* * Scenario - perform inference with NHWC input layout changed via this->config.json. - * + * * 1. Load model with layout=nhwc:nchw, initial internal layout: nchw, initial shape=(1,3,4,5) * 2. Do the inference with (1,4,5,3) shape - expect status OK and result (1,3,4,5) * 3. Do the inference with (1,3,4,5) shape - expect INVALID_SHAPE @@ -907,7 +980,7 @@ TYPED_TEST(TestPredict, PerformInferenceChangeModelInputLayout) { } /* * Scenario - perform inference with NHWC input layout changed and shape changed via this->config.json. - * + * * 1. Load model with layout=nchw:nhwc and shape=(1,1,2,3), initial internal layout: nchw, initial shape=(1,3,4,5) * 2. Do the inference with (1,1,2,3) shape - expect status OK and result (1,3,1,2) * 3. Do the inference with (1,3,1,2) shape - expect INVALID_SHAPE @@ -1208,7 +1281,7 @@ TYPED_TEST(TestPredict, PerformInferenceWithBinaryInputBatchSizeAuto) { * * 1. Load model with input layout=nhwc, batch_size=auto, initial internal layout: nchw, batch_size=1 * 2. Do the inference with binary image tensor with no shape set - expect status INVALID_NO_OF_SHAPE_DIMENSIONS -*/ + */ TYPED_TEST(TestPredict, PerformInferenceWithBinaryInputNoInputShape) { if (typeid(typename TypeParam::first_type) == typeid(ovms::InferenceRequest)) GTEST_SKIP() << "Binary inputs not implemented for C-API yet"; @@ -1231,7 +1304,7 @@ TYPED_TEST(TestPredict, PerformInferenceWithBinaryInputNoInputShape) { /* * Scenario - perform inference with with batch size set to auto and batch size not matching on position other than first - * + * * 1. Load model with bs=auto, layout=b=>cn,a=>cn initial internal shape (1,10) * 2. Do the inference with (1,30) shape - expect status OK and result (1,30) * 3. Change model batch size to fixed=4 with config.json change @@ -1287,7 +1360,7 @@ TYPED_TEST(TestPredict, ChangeBatchSizeViaRequestAndConfigChangeArbitraryPositio * 1. Load model with input shape (-1, -1) * 2. Do the inference with (3, 2) shape, expect correct output shape * 3. Do the inference with (1, 4) shape, expect correct output shape -*/ + */ TYPED_TEST(TestPredict, PerformInferenceDummyAllDimensionsAny) { using namespace ovms; @@ -1312,7 +1385,7 @@ TYPED_TEST(TestPredict, PerformInferenceDummyAllDimensionsAny) { * * 1. Load model with input shape (-1, 10) * 2. Do the X inferences with (x, 10) shape, expect correct output shapes. x=[1, 3, 5, 7, 11, 17, 21, 57, 99] -*/ + */ TYPED_TEST(TestPredict, PerformInferenceDummyBatchSizeAny) { using namespace ovms; @@ -1332,7 +1405,7 @@ TYPED_TEST(TestPredict, PerformInferenceDummyBatchSizeAny) { * * 1. Load model with input shape (-1, 10) * 2. Do the inferences with (3, 10) shape, expect correct output shapes and precision -*/ + */ static ovms::Precision getPrecisionFromResponse(ovms::InferenceResponse& response, const std::string& name) { size_t outputCount = response.getOutputCount(); @@ -1381,7 +1454,7 @@ TYPED_TEST(TestPredict, PerformInferenceDummyFp64) { typename TypeParam::second_type response; Preparer preparer; - preparer.preparePredictRequest(request, {{"input:0", std::tuple{{3, 10}, ovms::Precision::FP64}}}); + preparer.preparePredictRequest(request, {{"input:0", std::tuple{{3, 10}, ovms::Precision::FP64}}}); ASSERT_EQ(this->performInferenceWithRequest(request, response, "dummy_fp64"), ovms::StatusCode::OK); this->checkOutputShape(response, {3, 10}, "output:0"); ASSERT_EQ(getPrecisionFromResponse(response, "output:0"), ovms::Precision::FP64); @@ -1397,7 +1470,7 @@ TYPED_TEST(TestPredict, PerformInferenceDummyFp64) { * 5. Do the inference with (3, 5) shape, expect success and correct output shape * 6. Do the inference with (3, 6) shape, expect not in range * 7. Do the inference with (5, 5) shape, expect not in range -*/ + */ TYPED_TEST(TestPredict, PerformInferenceDummyAllDimensionsHaveRange) { using namespace ovms; @@ -1540,7 +1613,7 @@ TYPED_TEST(TestPredict, InferenceWithNegativeShape) { int64_t negativeBatch = -5; preparePredictRequest(request, {{DUMMY_MODEL_INPUT_NAME, - std::tuple{{negativeBatch, 10}, ovms::Precision::FP32}}}, + std::tuple{{negativeBatch, 10}, ovms::Precision::FP32}}}, data); ovms::ModelConfig config = DUMMY_MODEL_CONFIG; config.setBatchSize(1); @@ -1559,7 +1632,7 @@ TYPED_TEST(TestPredict, InferenceWithNegativeShapeDynamicParameter) { int64_t negativeBatch = -5; preparePredictRequest(request, {{DUMMY_MODEL_INPUT_NAME, - std::tuple{{negativeBatch, 10}, ovms::Precision::FP32}}}, + std::tuple{{negativeBatch, 10}, ovms::Precision::FP32}}}, data); ovms::ModelConfig config = DUMMY_MODEL_CONFIG; config.setBatchingParams("auto"); @@ -1572,4 +1645,154 @@ TYPED_TEST(TestPredict, InferenceWithNegativeShapeDynamicParameter) { ASSERT_NE(modelInstance->infer(&request, &response, modelInstanceUnloadGuard), ovms::StatusCode::OK); } +TYPED_TEST(TestPredict, InferenceWithStringInputs_positive_2D) { + if (typeid(typename TypeParam::first_type) == typeid(ovms::InferenceRequest)) + GTEST_SKIP() << "String inputs not supported for C-API"; + typename TypeParam::first_type request; + std::vector inputStrings = {"String_123", "String"}; + prepareInferStringRequest(request, PASSTHROUGH_MODEL_INPUT_NAME, inputStrings); + ovms::ModelConfig config = PASSTHROUGH_MODEL_CONFIG; + config.setBatchingParams("auto"); + ASSERT_EQ(this->manager.reloadModelWithVersions(config), ovms::StatusCode::OK_RELOADED); + std::shared_ptr modelInstance; + std::unique_ptr modelInstanceUnloadGuard; + ASSERT_EQ(this->manager.getModelInstance(config.getName(), config.getVersion(), modelInstance, modelInstanceUnloadGuard), ovms::StatusCode::OK); + typename TypeParam::second_type response; + ASSERT_EQ(modelInstance->infer(&request, &response, modelInstanceUnloadGuard), ovms::StatusCode::OK); + this->checkOutputShape(response, {2, 11}, PASSTHROUGH_MODEL_OUTPUT_NAME); + std::vector expectedData = { + 'S', 't', 'r', 'i', 'n', 'g', '_', '1', '2', '3', 0, + 'S', 't', 'r', 'i', 'n', 'g', 0, 0, 0, 0, 0}; + bool checkRaw = true; + this->checkOutputValuesU8(response, expectedData, PASSTHROUGH_MODEL_OUTPUT_NAME, checkRaw); +} + +TYPED_TEST(TestPredict, InferenceWithStringInputs_positive_2D_data_in_buffer) { + if (typeid(typename TypeParam::first_type) == typeid(ovms::InferenceRequest) || typeid(typename TypeParam::first_type) == typeid(TFSRequestType)) + GTEST_SKIP() << "String inputs in buffer not supported for C-API and TFS api"; + typename TypeParam::first_type request; + std::vector inputStrings = {"String_123", "String"}; + prepareInferStringRequest(request, PASSTHROUGH_MODEL_INPUT_NAME, inputStrings, false); + ovms::ModelConfig config = PASSTHROUGH_MODEL_CONFIG; + config.setBatchingParams("auto"); + ASSERT_EQ(this->manager.reloadModelWithVersions(config), ovms::StatusCode::OK_RELOADED); + std::shared_ptr modelInstance; + std::unique_ptr modelInstanceUnloadGuard; + ASSERT_EQ(this->manager.getModelInstance(config.getName(), config.getVersion(), modelInstance, modelInstanceUnloadGuard), ovms::StatusCode::OK); + typename TypeParam::second_type response; + ASSERT_EQ(modelInstance->infer(&request, &response, modelInstanceUnloadGuard), ovms::StatusCode::OK); + this->checkOutputShape(response, {2, 11}, PASSTHROUGH_MODEL_OUTPUT_NAME); + std::vector expectedData = { + 'S', 't', 'r', 'i', 'n', 'g', '_', '1', '2', '3', 0, + 'S', 't', 'r', 'i', 'n', 'g', 0, 0, 0, 0, 0}; + bool checkRaw = true; + this->checkOutputValuesU8(response, expectedData, PASSTHROUGH_MODEL_OUTPUT_NAME, checkRaw); +} + +TYPED_TEST(TestPredict, InferenceWithStringInputs_positive_1D) { + if (typeid(typename TypeParam::first_type) == typeid(ovms::InferenceRequest)) + GTEST_SKIP() << "String inputs not supported for C-API"; + typename TypeParam::first_type request; + std::vector inputStrings = {"ala", "", "ma", "kota"}; + prepareInferStringRequest(request, PASSTHROUGH_MODEL_INPUT_NAME, inputStrings); + ovms::ModelConfig config = PASSTHROUGH_MODEL_CONFIG; + config.setBatchingParams("0"); + config.parseShapeParameter("(-1)"); + ASSERT_EQ(this->manager.reloadModelWithVersions(config), ovms::StatusCode::OK_RELOADED); + std::shared_ptr modelInstance; + std::unique_ptr modelInstanceUnloadGuard; + ASSERT_EQ(this->manager.getModelInstance(config.getName(), config.getVersion(), modelInstance, modelInstanceUnloadGuard), ovms::StatusCode::OK); + typename TypeParam::second_type response; + ASSERT_EQ(modelInstance->infer(&request, &response, modelInstanceUnloadGuard), ovms::StatusCode::OK); + this->checkOutputShape(response, {1, 33}, PASSTHROUGH_MODEL_OUTPUT_NAME); + std::vector expectedData = { + 4, 0, 0, 0, // batch size + 0, 0, 0, 0, // first string start offset + 3, 0, 0, 0, // end of "ala" in condensed content + 3, 0, 0, 0, // end of "" in condensed content + 5, 0, 0, 0, // end of "ma" in condensed content + 9, 0, 0, 0, // end of "kota" in condensed content + 'a', 'l', 'a', + 'm', 'a', + 'k', 'o', 't', 'a'}; + bool checkRaw = true; + this->checkOutputValuesU8(response, expectedData, PASSTHROUGH_MODEL_OUTPUT_NAME, checkRaw); +} + +TYPED_TEST(TestPredict, InferenceWithStringInputs_positive_1D_data_in_buffer) { + if (typeid(typename TypeParam::first_type) == typeid(ovms::InferenceRequest) || typeid(typename TypeParam::first_type) == typeid(TFSRequestType)) + GTEST_SKIP() << "String inputs in buffer not supported for C-API and TFS api"; + typename TypeParam::first_type request; + std::vector inputStrings = {"ala", "", "ma", "kota"}; + prepareInferStringRequest(request, PASSTHROUGH_MODEL_INPUT_NAME, inputStrings, false); + ovms::ModelConfig config = PASSTHROUGH_MODEL_CONFIG; + config.setBatchingParams("0"); + config.parseShapeParameter("(-1)"); + ASSERT_EQ(this->manager.reloadModelWithVersions(config), ovms::StatusCode::OK_RELOADED); + std::shared_ptr modelInstance; + std::unique_ptr modelInstanceUnloadGuard; + ASSERT_EQ(this->manager.getModelInstance(config.getName(), config.getVersion(), modelInstance, modelInstanceUnloadGuard), ovms::StatusCode::OK); + typename TypeParam::second_type response; + ASSERT_EQ(modelInstance->infer(&request, &response, modelInstanceUnloadGuard), ovms::StatusCode::OK); + this->checkOutputShape(response, {1, 33}, PASSTHROUGH_MODEL_OUTPUT_NAME); + std::vector expectedData = { + 4, 0, 0, 0, // batch size + 0, 0, 0, 0, // first string start offset + 3, 0, 0, 0, // end of "ala" in condensed content + 3, 0, 0, 0, // end of "" in condensed content + 5, 0, 0, 0, // end of "ma" in condensed content + 9, 0, 0, 0, // end of "kota" in condensed content + 'a', 'l', 'a', + 'm', 'a', + 'k', 'o', 't', 'a'}; + bool checkRaw = true; + this->checkOutputValuesU8(response, expectedData, PASSTHROUGH_MODEL_OUTPUT_NAME, checkRaw); +} + +class TestPredictKFS : public TestPredict {}; + +TEST_F(TestPredictKFS, RequestDataInFp32ContentResponseInRaw) { + KFSRequest request; + std::vector data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + bool putBufferInInputTensorContent = true; // put in fp32_content + preparePredictRequest(request, + {{DUMMY_MODEL_INPUT_NAME, + std::tuple{{1, 10}, ovms::Precision::FP32}}}, + data, + putBufferInInputTensorContent); + ovms::ModelConfig config = DUMMY_MODEL_CONFIG; + + ASSERT_EQ(this->manager.reloadModelWithVersions(config), ovms::StatusCode::OK_RELOADED); + std::shared_ptr modelInstance; + std::unique_ptr modelInstanceUnloadGuard; + ASSERT_EQ(this->manager.getModelInstance(config.getName(), config.getVersion(), modelInstance, modelInstanceUnloadGuard), ovms::StatusCode::OK); + KFSResponse response; + ASSERT_EQ(modelInstance->infer(&request, &response, modelInstanceUnloadGuard), ovms::StatusCode::OK); + ASSERT_EQ(response.outputs_size(), 1); + ASSERT_FALSE(response.outputs(0).has_contents()); + ASSERT_GT(response.raw_output_contents_size(), 0); +} + +TEST_F(TestPredictKFS, RequestDataInRawResponseInRaw) { + KFSRequest request; + std::vector data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + bool putBufferInInputTensorContent = false; // put in raw + preparePredictRequest(request, + {{DUMMY_MODEL_INPUT_NAME, + std::tuple{{1, 10}, ovms::Precision::FP32}}}, + data, + putBufferInInputTensorContent); + ovms::ModelConfig config = DUMMY_MODEL_CONFIG; + + ASSERT_EQ(this->manager.reloadModelWithVersions(config), ovms::StatusCode::OK_RELOADED); + std::shared_ptr modelInstance; + std::unique_ptr modelInstanceUnloadGuard; + ASSERT_EQ(this->manager.getModelInstance(config.getName(), config.getVersion(), modelInstance, modelInstanceUnloadGuard), ovms::StatusCode::OK); + KFSResponse response; + ASSERT_EQ(modelInstance->infer(&request, &response, modelInstanceUnloadGuard), ovms::StatusCode::OK); + ASSERT_EQ(response.outputs_size(), 1); + ASSERT_FALSE(response.outputs(0).has_contents()); + ASSERT_GT(response.raw_output_contents_size(), 0); +} + #pragma GCC diagnostic pop diff --git a/src/test/rest_utils_test.cpp b/src/test/rest_utils_test.cpp index 608a95f371..52e78a6cb3 100644 --- a/src/test/rest_utils_test.cpp +++ b/src/test/rest_utils_test.cpp @@ -66,6 +66,52 @@ class TFSMakeJsonFromPredictResponseRawTest : public ::testing::TestWithParamset_dtype(tensorflow::DataType::DT_STRING); + } +}; + +TEST_F(TFSMakeJsonFromPredictResponseStringTest, PositiveRow) { + output1->add_string_val("Hello"); + output1->mutable_tensor_shape()->add_dim()->set_size(1); + std::string expected_json = "{\n \"predictions\": [\"Hello\"\n ]\n}"; + ASSERT_EQ(makeJsonFromPredictResponse(proto, &json, Order::ROW), StatusCode::OK); + EXPECT_EQ(json, expected_json); +} + +TEST_F(TFSMakeJsonFromPredictResponseStringTest, PositiveRowBatchSize2) { + output1->add_string_val("Hello"); + output1->add_string_val("World"); + output1->mutable_tensor_shape()->add_dim()->set_size(2); + std::string expected_json = "{\n \"predictions\": [\"Hello\", \"World\"\n ]\n}"; + ASSERT_EQ(makeJsonFromPredictResponse(proto, &json, Order::ROW), StatusCode::OK); + EXPECT_EQ(json, expected_json); +} + +TEST_F(TFSMakeJsonFromPredictResponseStringTest, PositiveColumn) { + output1->add_string_val("Hello"); + output1->mutable_tensor_shape()->add_dim()->set_size(1); + std::string expected_json = "{\n \"outputs\": [\n \"Hello\"\n ]\n}"; + ASSERT_EQ(makeJsonFromPredictResponse(proto, &json, Order::COLUMN), StatusCode::OK); + EXPECT_EQ(json, expected_json); +} + +TEST_F(TFSMakeJsonFromPredictResponseStringTest, PositiveColumnBatchSize2) { + output1->add_string_val("Hello"); + output1->add_string_val("World"); + output1->mutable_tensor_shape()->add_dim()->set_size(2); + std::string expected_json = "{\n \"outputs\": [\n \"Hello\",\n \"World\"\n ]\n}"; + ASSERT_EQ(makeJsonFromPredictResponse(proto, &json, Order::COLUMN), StatusCode::OK); + EXPECT_EQ(json, expected_json); +} + TEST_F(TFSMakeJsonFromPredictResponseRawTest, CannotConvertUnknownOrder) { EXPECT_EQ(makeJsonFromPredictResponse(proto, &json, Order::UNKNOWN), StatusCode::REST_PREDICT_UNKNOWN_ORDER); } @@ -128,7 +174,7 @@ const char* rawPositiveFirstOrderResponseColumn = R"({ } })"; -std::string getJsonResponseDependsOnOrder(ovms::Order order, const char* rowOrderResponse, const char* columnOrderResponse) { +static std::string getJsonResponseDependsOnOrder(ovms::Order order, const char* rowOrderResponse, const char* columnOrderResponse) { switch (order) { case Order::ROW: return rowOrderResponse; @@ -236,7 +282,7 @@ TEST_P(TFSMakeJsonFromPredictResponseRawTest, Positive_Noname) { std::vector SupportedOrders = {Order::ROW, Order::COLUMN}; -std::string toString(ovms::Order order) { +static std::string toString(ovms::Order order) { switch (order) { case Order::ROW: return "ROW"; @@ -707,40 +753,6 @@ TEST_F(KFSMakeJsonFromPredictResponseRawTest, Positive) { })"); } -TEST_F(KFSMakeJsonFromPredictResponseRawTest, Positive_binary) { - int output2DataSize = 10 * sizeof(int8_t); - output2->set_datatype("BYTES"); - output2->mutable_shape()->Clear(); - output2->mutable_shape()->Add(1); - output2->mutable_shape()->Add(output2DataSize); - - ASSERT_EQ(makeJsonFromPredictResponse(proto, &json, inferenceHeaderContentLength), StatusCode::OK); - ASSERT_EQ(inferenceHeaderContentLength.has_value(), true); - std::string expectedJson = R"({ - "model_name": "model", - "id": "id", - "outputs": [{ - "name": "output1", - "shape": [2, 1, 4], - "datatype": "FP32", - "data": [5.0, 10.0, -3.0, 2.5, 9.0, 55.5, -0.5, -1.5] - }, { - "name": "output2", - "shape": [1, 10], - "datatype": "BYTES", - "parameters": { - "binary_data_size": 10 - } - }] -})"; - ASSERT_EQ(inferenceHeaderContentLength.value(), expectedJson.size()); - ASSERT_EQ(json.size(), expectedJson.size() + output2DataSize); - EXPECT_EQ(json.substr(0, inferenceHeaderContentLength.value()), expectedJson); - for (int i = 0; i < output2DataSize; i++) { - EXPECT_EQ((int)json.substr(inferenceHeaderContentLength.value())[i], data2[i]); - } -} - TEST_F(KFSMakeJsonFromPredictResponseRawTest, EmptyRawOutputContentsError) { proto.mutable_raw_output_contents()->Clear(); EXPECT_EQ(makeJsonFromPredictResponse(proto, &json, inferenceHeaderContentLength), StatusCode::REST_SERIALIZE_NO_DATA); @@ -1109,59 +1121,58 @@ TEST_F(KFSMakeJsonFromPredictResponsePrecisionTest, Uint64_binary) { assertDataBinary(data, expectedJson); } -TEST_F(KFSMakeJsonFromPredictResponsePrecisionTest, BYTES_1) { - int8_t data = -53; +TEST_F(KFSMakeJsonFromPredictResponsePrecisionTest, raw_BYTES_string) { + uint8_t data[] = {4, 0, 0, 0, 'a', 'b', 'c', 'd', 3, 0, 0, 0, 'e', 'f', 'g'}; + int dataSize = 15; output->set_datatype("BYTES"); + output->mutable_shape()->Clear(); + output->mutable_shape()->Add(2); // batch size auto* output_contents = proto.add_raw_output_contents(); - output_contents->assign(reinterpret_cast(&data), sizeof(int8_t)); - ASSERT_EQ(makeJsonFromPredictResponse(proto, &json, inferenceHeaderContentLength), StatusCode::OK); - ASSERT_EQ(inferenceHeaderContentLength.has_value(), true); + output_contents->assign(reinterpret_cast(&data), dataSize); + auto status = makeJsonFromPredictResponse(proto, &json, inferenceHeaderContentLength); + ASSERT_EQ(status, StatusCode::OK) << status.string(); std::string expectedJson = R"({ "model_name": "model", "id": "id", "outputs": [{ "name": "output", - "shape": [1, 1], + "shape": [2], "datatype": "BYTES", - "parameters": { - "binary_data_size": 1 - } + "data": ["abcd", "efg"] }] })"; - ASSERT_EQ(inferenceHeaderContentLength.value(), expectedJson.size()); - ASSERT_EQ(json.size(), expectedJson.size() + sizeof(int8_t)); - EXPECT_EQ(json.substr(0, inferenceHeaderContentLength.value()), expectedJson); - EXPECT_EQ((int)json.substr(inferenceHeaderContentLength.value())[0], data); + SPDLOG_INFO(json); + ASSERT_EQ(json.size(), expectedJson.size()); + ASSERT_EQ(json, expectedJson); } -TEST_F(KFSMakeJsonFromPredictResponsePrecisionTest, BYTES_2) { - int8_t data[] = {-53, 1, 2, 3}; - int dataSize = 4; +TEST_F(KFSMakeJsonFromPredictResponsePrecisionTest, raw_BYTES_string_binary) { + uint8_t data[] = {4, 0, 0, 0, 'a', 'b', 'c', 'd', 3, 0, 0, 0, 'e', 'f', 'g'}; + int dataSize = 15; output->set_datatype("BYTES"); output->mutable_shape()->Clear(); - output->mutable_shape()->Add(1); - output->mutable_shape()->Add(dataSize); + output->mutable_shape()->Add(2); // batch size auto* output_contents = proto.add_raw_output_contents(); output_contents->assign(reinterpret_cast(&data), dataSize); - ASSERT_EQ(makeJsonFromPredictResponse(proto, &json, inferenceHeaderContentLength), StatusCode::OK); - ASSERT_EQ(inferenceHeaderContentLength.has_value(), true); + ASSERT_EQ(makeJsonFromPredictResponse(proto, &json, inferenceHeaderContentLength, {"output"}), StatusCode::OK); std::string expectedJson = R"({ "model_name": "model", "id": "id", "outputs": [{ "name": "output", - "shape": [1, 4], + "shape": [2], "datatype": "BYTES", "parameters": { - "binary_data_size": 4 + "binary_data_size": 15 } }] })"; + ASSERT_TRUE(inferenceHeaderContentLength.has_value()); ASSERT_EQ(inferenceHeaderContentLength.value(), expectedJson.size()); - ASSERT_EQ(json.size(), expectedJson.size() + dataSize * sizeof(int8_t)); + ASSERT_EQ(json.size(), expectedJson.size() + dataSize); EXPECT_EQ(json.substr(0, inferenceHeaderContentLength.value()), expectedJson); for (int i = 0; i < dataSize; i++) { - EXPECT_EQ((int)json.substr(inferenceHeaderContentLength.value())[i], data[i]); + EXPECT_EQ((uint8_t)json.substr(inferenceHeaderContentLength.value())[i], data[i]); } } @@ -1201,15 +1212,48 @@ TEST_F(KFSMakeJsonFromPredictResponseValTest, MakeJsonFromPredictResponse_Positi KFSTensorOutputProto* bytes_val_proto = proto.add_outputs(); bytes_val_proto->set_name("bytes_val_proto"); bytes_val_proto->set_datatype("BYTES"); - int dataSize = 10; - bytes_val_proto->mutable_shape()->Add(dataSize); + bytes_val_proto->mutable_shape()->Add(2); auto bytes_val = bytes_val_proto->mutable_contents()->mutable_bytes_contents()->Add(); - // *bytes_val = i; - for (uint8_t i = 0; i < dataSize; i++) { - bytes_val->append((char*)&i, 1); - } + bytes_val->assign("string_1"); + bytes_val = bytes_val_proto->mutable_contents()->mutable_bytes_contents()->Add(); + bytes_val->assign("string_2"); ASSERT_EQ(makeJsonFromPredictResponse(proto, &json, inferenceHeaderContentLength), StatusCode::OK); + std::string expectedJson = R"({ + "model_name": "model", + "id": "id", + "outputs": [{ + "name": "single_uint64_val", + "shape": [1], + "datatype": "UINT64", + "data": [5000000000] + }, { + "name": "two_uint32_vals", + "shape": [2], + "datatype": "UINT32", + "data": [4000000000, 1] + }, { + "name": "bytes_val_proto", + "shape": [2], + "datatype": "BYTES", + "data": ["string_1", "string_2"] + }] +})"; + ASSERT_EQ(json.size(), expectedJson.size()); + EXPECT_EQ(json, expectedJson); +} + +TEST_F(KFSMakeJsonFromPredictResponseValTest, MakeJsonFromPredictResponse_Positive_BYTES_binary) { + KFSTensorOutputProto* bytes_val_proto = proto.add_outputs(); + bytes_val_proto->set_name("bytes_val_proto"); + bytes_val_proto->set_datatype("BYTES"); + bytes_val_proto->mutable_shape()->Add(2); + auto bytes_val = bytes_val_proto->mutable_contents()->mutable_bytes_contents()->Add(); + bytes_val->assign("string_1"); + bytes_val = bytes_val_proto->mutable_contents()->mutable_bytes_contents()->Add(); + bytes_val->assign("string_2"); + + ASSERT_EQ(makeJsonFromPredictResponse(proto, &json, inferenceHeaderContentLength, {"bytes_val_proto"}), StatusCode::OK); ASSERT_EQ(inferenceHeaderContentLength.has_value(), true); std::string expectedJson = R"({ "model_name": "model", @@ -1226,18 +1270,19 @@ TEST_F(KFSMakeJsonFromPredictResponseValTest, MakeJsonFromPredictResponse_Positi "data": [4000000000, 1] }, { "name": "bytes_val_proto", - "shape": [10], + "shape": [2], "datatype": "BYTES", "parameters": { - "binary_data_size": 10 + "binary_data_size": 24 } }] })"; + std::vector expectedBinaryData = {8, 0, 0, 0, 's', 't', 'r', 'i', 'n', 'g', '_', '1', 8, 0, 0, 0, 's', 't', 'r', 'i', 'n', 'g', '_', '2'}; ASSERT_EQ(inferenceHeaderContentLength.value(), expectedJson.size()); - ASSERT_EQ(json.size(), expectedJson.size() + dataSize * sizeof(int8_t)); + ASSERT_EQ(json.size(), expectedJson.size() + expectedBinaryData.size()); EXPECT_EQ(json.substr(0, inferenceHeaderContentLength.value()), expectedJson); - for (uint8_t i = 0; i < dataSize; i++) { - EXPECT_EQ((uint8_t)json.substr(inferenceHeaderContentLength.value())[i], i); + for (uint8_t i = 0; i < expectedBinaryData.size(); i++) { + EXPECT_EQ((uint8_t)json.substr(inferenceHeaderContentLength.value())[i], expectedBinaryData[i]); } } @@ -1434,3 +1479,61 @@ TEST_F(KFSMakeJsonFromPredictResponseValTest, MakeJsonFromPredictResponse_Option }] })"); } + +class KFSMakeJsonFromPredictResponseStringTest : public ::testing::Test { +protected: + KFSResponse proto; + std::string json; + KFSTensorOutputProto *string_output_1, *string_output_2; + std::optional inferenceHeaderContentLength; + + void SetUp() override { + proto.set_model_name("model"); + proto.set_id("id"); + + string_output_1 = proto.add_outputs(); + string_output_1->set_name("string_output_1"); + string_output_1->set_datatype("BYTES"); + string_output_1->mutable_shape()->Add(2); + string_output_1->mutable_contents()->mutable_bytes_contents()->Add()->assign("hello world"); + string_output_1->mutable_contents()->mutable_bytes_contents()->Add()->assign("welcome to kfs"); + + string_output_2 = proto.add_outputs(); + string_output_2->set_name("string_output_2_string"); + string_output_2->set_datatype("BYTES"); + string_output_2->mutable_shape()->Add(2); + string_output_2->mutable_contents()->mutable_bytes_contents()->Add()->assign("my 1 string"); + string_output_2->mutable_contents()->mutable_bytes_contents()->Add()->assign("my second string"); + } +}; + +TEST_F(KFSMakeJsonFromPredictResponseStringTest, Positive) { + auto status = makeJsonFromPredictResponse(proto, &json, inferenceHeaderContentLength, {"string_output_1"}); + ASSERT_EQ(status, StatusCode::OK) << status.string(); + + ASSERT_EQ(inferenceHeaderContentLength.has_value(), true); + std::string expectedJson = R"({ + "model_name": "model", + "id": "id", + "outputs": [{ + "name": "string_output_1", + "shape": [2], + "datatype": "BYTES", + "parameters": { + "binary_data_size": 33 + } + }, { + "name": "string_output_2_string", + "shape": [2], + "datatype": "BYTES", + "data": ["my 1 string", "my second string"] + }] +})"; + ASSERT_EQ(inferenceHeaderContentLength.value(), expectedJson.size()); + ASSERT_EQ(json.size(), expectedJson.size() + 33); + EXPECT_EQ(json.substr(0, inferenceHeaderContentLength.value()), expectedJson); + std::vector binaryData = { + 11, 0, 0, 0, 'h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', + 14, 0, 0, 0, 'w', 'e', 'l', 'c', 'o', 'm', 'e', ' ', 't', 'o', ' ', 'k', 'f', 's'}; + EXPECT_EQ(std::memcmp(json.substr(inferenceHeaderContentLength.value()).data(), binaryData.data(), 33), 0); +} diff --git a/src/test/schema_test.cpp b/src/test/schema_test.cpp index 5f6a654384..a02d5b04c3 100644 --- a/src/test/schema_test.cpp +++ b/src/test/schema_test.cpp @@ -52,7 +52,7 @@ TEST(SchemaTest, PipelineConfigMatchingSchema) { rapidjson::Document pipelineConfigMatchingSchemaParsed; pipelineConfigMatchingSchemaParsed.Parse(pipelineConfigMatchingSchema); - auto result = ovms::validateJsonAgainstSchema(pipelineConfigMatchingSchemaParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(pipelineConfigMatchingSchemaParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::OK); } @@ -91,7 +91,7 @@ TEST(SchemaTest, PipelineConfigWithNegativeNodeVersion) { rapidjson::Document PipelineConfigWithNegativeNodeVersionParsed; PipelineConfigWithNegativeNodeVersionParsed.Parse(PipelineConfigWithNegativeNodeVersion); - auto result = ovms::validateJsonAgainstSchema(PipelineConfigWithNegativeNodeVersionParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(PipelineConfigWithNegativeNodeVersionParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -129,7 +129,7 @@ TEST(SchemaTest, PipelineConfigNameInvalidType) { rapidjson::Document pipelineConfigNameInvalidTypeParsed; pipelineConfigNameInvalidTypeParsed.Parse(pipelineConfigNameInvalidType); - auto result = ovms::validateJsonAgainstSchema(pipelineConfigNameInvalidTypeParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(pipelineConfigNameInvalidTypeParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -166,7 +166,7 @@ TEST(SchemaTest, PipelineConfigNodeOutputsInvalidType) { rapidjson::Document pipelineConfigNodeOutputsInvalidTypeParsed; pipelineConfigNodeOutputsInvalidTypeParsed.Parse(pipelineConfigNodeOutputsInvalidType); - auto result = ovms::validateJsonAgainstSchema(pipelineConfigNodeOutputsInvalidTypeParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(pipelineConfigNodeOutputsInvalidTypeParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -203,7 +203,7 @@ TEST(SchemaTest, PipelineConfigMissingName) { rapidjson::Document pipelineConfigMissingNameParsed; pipelineConfigMissingNameParsed.Parse(pipelineConfigMissingName); - auto result = ovms::validateJsonAgainstSchema(pipelineConfigMissingNameParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(pipelineConfigMissingNameParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -226,7 +226,7 @@ TEST(SchemaTest, PipelineConfigMissingNodes) { rapidjson::Document pipelineConfigMatchingSchemaParsed; pipelineConfigMatchingSchemaParsed.Parse(PipelineConfigMissingNodes); - auto result = ovms::validateJsonAgainstSchema(pipelineConfigMatchingSchemaParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(pipelineConfigMatchingSchemaParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -263,7 +263,7 @@ TEST(SchemaTest, PipelineConfigMissingInputs) { rapidjson::Document pipelineConfigMissingInputsParsed; pipelineConfigMissingInputsParsed.Parse(pipelineConfigMissingInputs); - auto result = ovms::validateJsonAgainstSchema(pipelineConfigMissingInputsParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(pipelineConfigMissingInputsParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -296,7 +296,7 @@ TEST(SchemaTest, PipelineConfigMissingOutputs) { rapidjson::Document pipelineConfigMissingOutputsParsed; pipelineConfigMissingOutputsParsed.Parse(pipelineConfigMissingOutputs); - auto result = ovms::validateJsonAgainstSchema(pipelineConfigMissingOutputsParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(pipelineConfigMissingOutputsParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -335,7 +335,7 @@ TEST(SchemaTest, PipelineConfigContainsNotAllowedKeys) { rapidjson::Document pipelineConfigContainsNotAllowedKeysParsed; pipelineConfigContainsNotAllowedKeysParsed.Parse(pipelineConfigContainsNotAllowedKeys); - auto result = ovms::validateJsonAgainstSchema(pipelineConfigContainsNotAllowedKeysParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(pipelineConfigContainsNotAllowedKeysParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -374,7 +374,7 @@ TEST(SchemaTest, PipelineConfigNodeContainsNotAllowedKeys) { rapidjson::Document pipelineConfigNodeContainsNotAllowedKeysParsed; pipelineConfigNodeContainsNotAllowedKeysParsed.Parse(pipelineConfigNodeContainsNotAllowedKeys); - auto result = ovms::validateJsonAgainstSchema(pipelineConfigNodeContainsNotAllowedKeysParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(pipelineConfigNodeContainsNotAllowedKeysParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -412,7 +412,7 @@ TEST(SchemaTest, PipelineConfigNodeTypeNotAllowed) { rapidjson::Document pipelineConfigNodeTypeNotAllowedParsed; pipelineConfigNodeTypeNotAllowedParsed.Parse(pipelineConfigNodeTypeNotAllowed); - auto result = ovms::validateJsonAgainstSchema(pipelineConfigNodeTypeNotAllowedParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(pipelineConfigNodeTypeNotAllowedParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -450,7 +450,7 @@ TEST(SchemaTest, PipelineConfigNodeOutputsInvalid) { rapidjson::Document pipelineConfigNodeOutputsInvalidParsed; pipelineConfigNodeOutputsInvalidParsed.Parse(pipelineConfigNodeOutputsInvalid); - auto result = ovms::validateJsonAgainstSchema(pipelineConfigNodeOutputsInvalidParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(pipelineConfigNodeOutputsInvalidParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -488,7 +488,7 @@ TEST(SchemaTest, PipelineConfigNodeInputsInvalid) { rapidjson::Document pipelineConfigNodeInputsInvalidParsed; pipelineConfigNodeInputsInvalidParsed.Parse(pipelineConfigNodeInputsInvalid); - auto result = ovms::validateJsonAgainstSchema(pipelineConfigNodeInputsInvalidParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(pipelineConfigNodeInputsInvalidParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -525,7 +525,7 @@ TEST(SchemaTest, PipelineConfigNodeInputsSourceNodeNameMissing) { rapidjson::Document pipelineConfigNodeInputsSourceNodeNameMissingParsed; pipelineConfigNodeInputsSourceNodeNameMissingParsed.Parse(pipelineConfigNodeInputsSourceNodeNameMissing); - auto result = ovms::validateJsonAgainstSchema(pipelineConfigNodeInputsSourceNodeNameMissingParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(pipelineConfigNodeInputsSourceNodeNameMissingParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -562,7 +562,7 @@ TEST(SchemaTest, PipelineConfigOutputsSourceNodeNameMissing) { rapidjson::Document pipelineConfigOutputsSourceNodeNameMissingParsed; pipelineConfigOutputsSourceNodeNameMissingParsed.Parse(pipelineConfigOutputsSourceNodeNameMissing); - auto result = ovms::validateJsonAgainstSchema(pipelineConfigOutputsSourceNodeNameMissingParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(pipelineConfigOutputsSourceNodeNameMissingParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -602,7 +602,7 @@ TEST(SchemaTest, PipelineConfigNodesInputsInvalid) { rapidjson::Document pipelineConfigNodesInputsInvalidParsed; pipelineConfigNodesInputsInvalidParsed.Parse(pipelineConfigNodesInputsInvalid); - auto result = ovms::validateJsonAgainstSchema(pipelineConfigNodesInputsInvalidParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(pipelineConfigNodesInputsInvalidParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -768,7 +768,7 @@ TEST(SchemaTest, ModelConfigNireqNegative) { rapidjson::Document modelConfigNireqNegativeParsed; modelConfigNireqNegativeParsed.Parse(modelConfigNireqNegative); - auto result = ovms::validateJsonAgainstSchema(modelConfigNireqNegativeParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(modelConfigNireqNegativeParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -788,7 +788,7 @@ TEST(SchemaTest, ModelConfigSequenceMaxNumberNegative) { rapidjson::Document modelConfigSeqNegativeDoc; modelConfigSeqNegativeDoc.Parse(modelConfigSeqNegative); - auto result = ovms::validateJsonAgainstSchema(modelConfigSeqNegativeDoc, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(modelConfigSeqNegativeDoc, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -808,7 +808,7 @@ TEST(SchemaTest, ModelConfigTimeoutNegative) { rapidjson::Document modelConfigSeqNegativeDoc; modelConfigSeqNegativeDoc.Parse(modelConfigTimeoutNegative); - auto result = ovms::validateJsonAgainstSchema(modelConfigSeqNegativeDoc, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(modelConfigSeqNegativeDoc, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -827,7 +827,7 @@ TEST(SchemaTest, ModelConfigVersionPolicyAll) { })"; rapidjson::Document doc; doc.Parse(modelConfigVersionPolicyAll1); - auto result = ovms::validateJsonAgainstSchema(doc, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(doc, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::OK) << modelConfigVersionPolicyAll1; const char* modelConfigVersionPolicyAll2 = R"( { @@ -843,7 +843,7 @@ TEST(SchemaTest, ModelConfigVersionPolicyAll) { })"; rapidjson::Document doc2; doc2.Parse(modelConfigVersionPolicyAll2); - result = ovms::validateJsonAgainstSchema(doc2, ovms::MODELS_CONFIG_SCHEMA); + result = ovms::validateJsonAgainstSchema(doc2, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID) << modelConfigVersionPolicyAll2; const char* modelConfigVersionPolicyAll3 = R"( { @@ -859,7 +859,7 @@ TEST(SchemaTest, ModelConfigVersionPolicyAll) { })"; rapidjson::Document doc3; doc2.Parse(modelConfigVersionPolicyAll3); - result = ovms::validateJsonAgainstSchema(doc3, ovms::MODELS_CONFIG_SCHEMA); + result = ovms::validateJsonAgainstSchema(doc3, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID) << modelConfigVersionPolicyAll3; } TEST(SchemaTest, ModelConfigVersionPolicyLatest) { @@ -877,7 +877,7 @@ TEST(SchemaTest, ModelConfigVersionPolicyLatest) { })"; rapidjson::Document doc; doc.Parse(modelConfigVersionPolicyLatest1); - auto result = ovms::validateJsonAgainstSchema(doc, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(doc, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::OK); const char* modelConfigVersionPolicyLatest2 = R"( { @@ -893,7 +893,7 @@ TEST(SchemaTest, ModelConfigVersionPolicyLatest) { })"; rapidjson::Document doc2; doc2.Parse(modelConfigVersionPolicyLatest2); - result = ovms::validateJsonAgainstSchema(doc2, ovms::MODELS_CONFIG_SCHEMA); + result = ovms::validateJsonAgainstSchema(doc2, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); const char* modelConfigVersionPolicyLatest3 = R"( { @@ -909,7 +909,7 @@ TEST(SchemaTest, ModelConfigVersionPolicyLatest) { })"; rapidjson::Document doc3; doc3.Parse(modelConfigVersionPolicyLatest3); - result = ovms::validateJsonAgainstSchema(doc3, ovms::MODELS_CONFIG_SCHEMA); + result = ovms::validateJsonAgainstSchema(doc3, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } TEST(SchemaTest, ModelConfigVersionPolicySpecific) { @@ -927,7 +927,7 @@ TEST(SchemaTest, ModelConfigVersionPolicySpecific) { })"; rapidjson::Document doc1; doc1.Parse(modelConfigVersionPolicySpecific1); - auto result = ovms::validateJsonAgainstSchema(doc1, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(doc1, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::OK); const char* modelConfigVersionPolicySpecific2 = R"( { @@ -943,7 +943,7 @@ TEST(SchemaTest, ModelConfigVersionPolicySpecific) { })"; rapidjson::Document doc2; doc2.Parse(modelConfigVersionPolicySpecific2); - result = ovms::validateJsonAgainstSchema(doc2, ovms::MODELS_CONFIG_SCHEMA); + result = ovms::validateJsonAgainstSchema(doc2, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); const char* modelConfigVersionPolicySpecific3 = R"( { @@ -959,7 +959,7 @@ TEST(SchemaTest, ModelConfigVersionPolicySpecific) { })"; rapidjson::Document doc3; doc3.Parse(modelConfigVersionPolicySpecific3); - result = ovms::validateJsonAgainstSchema(doc3, ovms::MODELS_CONFIG_SCHEMA); + result = ovms::validateJsonAgainstSchema(doc3, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -979,7 +979,7 @@ TEST(SchemaTest, ModelConfigPluginConfigPositive) { rapidjson::Document modelConfigSeqNegativeDoc; modelConfigSeqNegativeDoc.Parse(modelConfigTimeoutNegative); - auto result = ovms::validateJsonAgainstSchema(modelConfigSeqNegativeDoc, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(modelConfigSeqNegativeDoc, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::OK); } @@ -1000,7 +1000,7 @@ TEST(SchemaTest, ModelConfigPluginConfigLayoutShapeNegative) { rapidjson::Document doc1; doc1.Parse(config1); - auto result = ovms::validateJsonAgainstSchema(doc1, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(doc1, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID) << config1; const char* config2 = R"( { @@ -1018,7 +1018,7 @@ TEST(SchemaTest, ModelConfigPluginConfigLayoutShapeNegative) { rapidjson::Document doc2; doc2.Parse(config2); - result = ovms::validateJsonAgainstSchema(doc2, ovms::MODELS_CONFIG_SCHEMA); + result = ovms::validateJsonAgainstSchema(doc2, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID) << config2; const char* config3 = R"( { @@ -1036,7 +1036,7 @@ TEST(SchemaTest, ModelConfigPluginConfigLayoutShapeNegative) { rapidjson::Document doc3; doc3.Parse(config3); - result = ovms::validateJsonAgainstSchema(doc3, ovms::MODELS_CONFIG_SCHEMA); + result = ovms::validateJsonAgainstSchema(doc3, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID) << config3; } TEST(SchemaTest, ModelConfigPluginConfigNegative) { @@ -1055,7 +1055,7 @@ TEST(SchemaTest, ModelConfigPluginConfigNegative) { rapidjson::Document doc; doc.Parse(modelConfigNegative); - auto result = ovms::validateJsonAgainstSchema(doc, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(doc, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); const char* modelConfigNegative2 = R"( { @@ -1072,7 +1072,7 @@ TEST(SchemaTest, ModelConfigPluginConfigNegative) { rapidjson::Document doc2; doc2.Parse(modelConfigNegative2); - result = ovms::validateJsonAgainstSchema(doc2, ovms::MODELS_CONFIG_SCHEMA); + result = ovms::validateJsonAgainstSchema(doc2, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -1154,7 +1154,7 @@ TEST(SchemaTest, CustomNodeLibraryConfigMatchingSchema) { rapidjson::Document customNodeLibraryConfigParsed; customNodeLibraryConfigParsed.Parse(customNodeLibraryConfig); - auto result = ovms::validateJsonAgainstSchema(customNodeLibraryConfigParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(customNodeLibraryConfigParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::OK); } @@ -1181,7 +1181,7 @@ TEST(SchemaTest, CustomNodeLibraryConfigMissingLibraryName) { rapidjson::Document customNodeLibraryConfigMissingLibraryNameParsed; customNodeLibraryConfigMissingLibraryNameParsed.Parse(customNodeLibraryConfigMissingLibraryName); - auto result = ovms::validateJsonAgainstSchema(customNodeLibraryConfigMissingLibraryNameParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(customNodeLibraryConfigMissingLibraryNameParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -1208,7 +1208,7 @@ TEST(SchemaTest, CustomNodeLibraryConfigMissingBasePath) { rapidjson::Document customNodeLibraryConfigMissingBasePathParsed; customNodeLibraryConfigMissingBasePathParsed.Parse(customNodeLibraryConfigMissingBasePath); - auto result = ovms::validateJsonAgainstSchema(customNodeLibraryConfigMissingBasePathParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(customNodeLibraryConfigMissingBasePathParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -1236,7 +1236,7 @@ TEST(SchemaTest, CustomNodeLibraryConfigInvalidNameType) { rapidjson::Document customNodeLibraryConfigInvalidNameTypeParsed; customNodeLibraryConfigInvalidNameTypeParsed.Parse(customNodeLibraryConfigInvalidNameType); - auto result = ovms::validateJsonAgainstSchema(customNodeLibraryConfigInvalidNameTypeParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(customNodeLibraryConfigInvalidNameTypeParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -1264,7 +1264,7 @@ TEST(SchemaTest, CustomNodeLibraryConfigInvalidBasePathType) { rapidjson::Document customNodeLibraryConfigInvalidBasePathTypeParsed; customNodeLibraryConfigInvalidBasePathTypeParsed.Parse(customNodeLibraryConfigInvalidBasePathType); - auto result = ovms::validateJsonAgainstSchema(customNodeLibraryConfigInvalidBasePathTypeParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(customNodeLibraryConfigInvalidBasePathTypeParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -1316,7 +1316,7 @@ TEST(SchemaTest, CustomNodeConfigInvalidLibraryNameType) { rapidjson::Document customNodeConfigInvalidLibraryNameTypeParsed; customNodeConfigInvalidLibraryNameTypeParsed.Parse(customNodeConfigInvalidLibraryNameType); - auto result = ovms::validateJsonAgainstSchema(customNodeConfigInvalidLibraryNameTypeParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(customNodeConfigInvalidLibraryNameTypeParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -1367,7 +1367,7 @@ TEST(SchemaTest, CustomNodeConfigNoLibraryName) { rapidjson::Document customNodeConfigNoLibraryNameParsed; customNodeConfigNoLibraryNameParsed.Parse(customNodeConfigNoLibraryName); - auto result = ovms::validateJsonAgainstSchema(customNodeConfigNoLibraryNameParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(customNodeConfigNoLibraryNameParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -1420,7 +1420,7 @@ TEST(SchemaTest, CustomNodeConfigModelNameShouldNotBeAcceptedInCustomNode) { rapidjson::Document customNodeConfigModelNameParsed; customNodeConfigModelNameParsed.Parse(customNodeConfigModelName); - auto result = ovms::validateJsonAgainstSchema(customNodeConfigModelNameParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(customNodeConfigModelNameParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -1479,7 +1479,7 @@ TEST(SchemaTest, CustomNodeConfigNotAppropiateParameterShouldNotBeAcceptedInCust rapidjson::Document customNodeConfigNotAppropiateParameterParsed; customNodeConfigNotAppropiateParameterParsed.Parse(customNodeConfigNotAppropiateParameter); - auto result = ovms::validateJsonAgainstSchema(customNodeConfigNotAppropiateParameterParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(customNodeConfigNotAppropiateParameterParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -1538,7 +1538,7 @@ TEST(SchemaTest, ModelNodeConfigLibraryNameShouldNotBeAcceptedInDLNode) { rapidjson::Document modelNodeConfigLibraryNameParsed; modelNodeConfigLibraryNameParsed.Parse(modelNodeConfigLibraryName); - auto result = ovms::validateJsonAgainstSchema(modelNodeConfigLibraryNameParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(modelNodeConfigLibraryNameParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -1597,7 +1597,7 @@ TEST(SchemaTest, ModelNodeConfigNotAppropiateParameterShouldNotBeAcceptedInDLNod rapidjson::Document modelNodeConfigNotAppropiateParameterParsed; modelNodeConfigNotAppropiateParameterParsed.Parse(modelNodeConfigNotAppropiateParameter); - auto result = ovms::validateJsonAgainstSchema(modelNodeConfigNotAppropiateParameterParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(modelNodeConfigNotAppropiateParameterParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -1655,7 +1655,7 @@ TEST(SchemaTest, CustomNodeConfigParamsInvalidType) { rapidjson::Document customNodeConfigParamsInvalidTypeParsed; customNodeConfigParamsInvalidTypeParsed.Parse(customNodeConfigParamsInvalidType); - auto result = ovms::validateJsonAgainstSchema(customNodeConfigParamsInvalidTypeParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(customNodeConfigParamsInvalidTypeParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -1715,7 +1715,7 @@ static const char* demultiplexerConfig = R"( TEST(SchemaTest, DemultiplexerConfigMatchingSchema) { rapidjson::Document demultiplexerConfigMatchingSchemaParsed; demultiplexerConfigMatchingSchemaParsed.Parse(demultiplexerConfig); - auto result = ovms::validateJsonAgainstSchema(demultiplexerConfigMatchingSchemaParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(demultiplexerConfigMatchingSchemaParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::OK); } @@ -1726,7 +1726,7 @@ TEST(SchemaTest, DemultiplexerConfigDemultiplyNegativeOneAllowed) { config.replace(config.find(demultiplyCountToReplace), demultiplyCountToReplace.size(), demultiplyCount); rapidjson::Document demultiplexerConfigDemultiplyCountNegativeParsed; demultiplexerConfigDemultiplyCountNegativeParsed.Parse(config.c_str()); - auto result = ovms::validateJsonAgainstSchema(demultiplexerConfigDemultiplyCountNegativeParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(demultiplexerConfigDemultiplyCountNegativeParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::OK); } @@ -1737,7 +1737,7 @@ TEST(SchemaTest, DemultiplexerConfigDemultiplyCountNegativeLowerThanNegativeOneN config.replace(config.find(demultiplyCountToReplace), demultiplyCountToReplace.size(), demultiplyCount); rapidjson::Document demultiplexerConfigDemultiplyCountNegativeParsed; demultiplexerConfigDemultiplyCountNegativeParsed.Parse(config.c_str()); - auto result = ovms::validateJsonAgainstSchema(demultiplexerConfigDemultiplyCountNegativeParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(demultiplexerConfigDemultiplyCountNegativeParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -1749,7 +1749,7 @@ TEST(SchemaTest, DemultiplexerConfigDemultiplyCountEqualsZeroAllowed) { config.replace(config.find(demultiplyCountToReplace), demultiplyCountToReplace.size(), demultiplyCount); rapidjson::Document demultiplexerConfigDemultiplyCountEqualsZeroParsed; demultiplexerConfigDemultiplyCountEqualsZeroParsed.Parse(config.c_str()); - auto result = ovms::validateJsonAgainstSchema(demultiplexerConfigDemultiplyCountEqualsZeroParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(demultiplexerConfigDemultiplyCountEqualsZeroParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::OK); } @@ -1760,7 +1760,7 @@ TEST(SchemaTest, DemultiplexerConfigDemultiplyCountEqualsOneAllowed) { config.replace(config.find(demultiplyCountToReplace), demultiplyCountToReplace.size(), demultiplyCount); rapidjson::Document demultiplexerConfigDemultiplyCountEqualsOneParsed; demultiplexerConfigDemultiplyCountEqualsOneParsed.Parse(config.c_str()); - auto result = ovms::validateJsonAgainstSchema(demultiplexerConfigDemultiplyCountEqualsOneParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(demultiplexerConfigDemultiplyCountEqualsOneParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::OK); } @@ -1771,7 +1771,7 @@ TEST(SchemaTest, DemultiplexerConfigDemultiplyCountTypeInvalid) { config.replace(config.find(demultiplyCountToReplace), demultiplyCountToReplace.size(), demultiplyCount); rapidjson::Document demultiplexerConfigDemultiplyCountTypeInvalidParsed; demultiplexerConfigDemultiplyCountTypeInvalidParsed.Parse(config.c_str()); - auto result = ovms::validateJsonAgainstSchema(demultiplexerConfigDemultiplyCountTypeInvalidParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(demultiplexerConfigDemultiplyCountTypeInvalidParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } @@ -1782,6 +1782,45 @@ TEST(SchemaTest, DemultiplexerConfigGatherFromNodeTypeInvalid) { config.replace(config.find(gatherFromNodeToReplace), gatherFromNodeToReplace.size(), gatherFromNode); rapidjson::Document demultiplexerConfigGatherFromNodeTypeInvalidParsed; demultiplexerConfigGatherFromNodeTypeInvalidParsed.Parse(config.c_str()); - auto result = ovms::validateJsonAgainstSchema(demultiplexerConfigGatherFromNodeTypeInvalidParsed, ovms::MODELS_CONFIG_SCHEMA); + auto result = ovms::validateJsonAgainstSchema(demultiplexerConfigGatherFromNodeTypeInvalidParsed, ovms::MODELS_CONFIG_SCHEMA.c_str()); + EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); +} + +#if (MEDIAPIPE_DISABLE == 0) +TEST(SchemaTest, MediapipeConfigPositive) { + const char* mediapipeConfigPositive = R"( + { + "model_config_list": [], + "mediapipe_config_list": [ + { + "name": "dummy_model", + "graph_path": "dummy_path" + } + ] + })"; + + rapidjson::Document configDoc; + configDoc.Parse(mediapipeConfigPositive); + auto result = ovms::validateJsonAgainstSchema(configDoc, ovms::MODELS_CONFIG_SCHEMA.c_str()); + EXPECT_EQ(result, ovms::StatusCode::OK); +} +#endif + +TEST(SchemaTest, MediapipeConfigNegativeAdditionalMediapipeConfigField) { + const char* mediapipeConfigNegative = R"( + { + "model_config_list": [], + "mediapipe_config_list": [ + { + "name": "dummy_model", + "graph_path": "dummy_path", + "someField": "ovms_rules" + } + ] + })"; + + rapidjson::Document configDoc; + configDoc.Parse(mediapipeConfigNegative); + auto result = ovms::validateJsonAgainstSchema(configDoc, ovms::MODELS_CONFIG_SCHEMA.c_str()); EXPECT_EQ(result, ovms::StatusCode::JSON_INVALID); } diff --git a/src/test/serialization_tests.cpp b/src/test/serialization_tests.cpp index f5c339fc88..d3017d8b3c 100644 --- a/src/test/serialization_tests.cpp +++ b/src/test/serialization_tests.cpp @@ -36,8 +36,6 @@ #include "../tfs_frontend/tfs_utils.hpp" #include "test_utils.hpp" -#include - using TFTensorProto = tensorflow::TensorProto; using TFPredictRequest = tensorflow::serving::PredictRequest; @@ -202,10 +200,10 @@ class TensorflowGRPCPredict : public ::testing::TestWithParam { class SerializeTFTensorProto : public TensorflowGRPCPredict { public: std::tuple< - std::shared_ptr, + std::shared_ptr, ov::Tensor> getInputs(ovms::Precision precision) { - std::shared_ptr servableOutput = + std::shared_ptr servableOutput = std::make_shared( std::string("2_values_C_layout"), precision, @@ -282,7 +280,7 @@ TEST(SerializeTFGRPCPredictResponse, ShouldSuccessForSupportedPrecision) { ov::CompiledModel compiledModel = ieCore.compile_model(model, "CPU"); ov::InferRequest inferRequest = compiledModel.create_infer_request(); ovms::tensor_map_t tenMap; - std::shared_ptr tensorInfo = std::make_shared( + std::shared_ptr tensorInfo = std::make_shared( DUMMY_MODEL_OUTPUT_NAME, ovms::Precision::FP32, ovms::Shape{1, 10}, @@ -416,10 +414,10 @@ TEST_F(KFServingGRPCPredict, NegativeMismatchBetweenTensorInfoAndTensorShape) { class SerializeKFSInferOutputTensor : public KFServingGRPCPredict { public: std::tuple< - std::shared_ptr, + std::shared_ptr, ov::Tensor> getInputs(ovms::Precision precision) { - std::shared_ptr servableOutput = + std::shared_ptr servableOutput = std::make_shared( std::string("2_values_C_layout"), precision, @@ -506,7 +504,7 @@ TEST(SerializeKFSGRPCPredictResponse, ShouldSuccessForSupportedPrecision) { ov::CompiledModel compiledModel = ieCore.compile_model(model, "CPU"); ov::InferRequest inferRequest = compiledModel.create_infer_request(); ovms::tensor_map_t tenMap; - std::shared_ptr tensorInfo = std::make_shared( + std::shared_ptr tensorInfo = std::make_shared( DUMMY_MODEL_OUTPUT_NAME, ovms::Precision::FP32, ovms::Shape{1, 10}, @@ -531,7 +529,7 @@ TEST(SerializeKFSGRPCPredictResponse, ShouldSuccessForSupportedPrecisionWithuseS ov::CompiledModel compiledModel = ieCore.compile_model(model, "CPU"); ov::InferRequest inferRequest = compiledModel.create_infer_request(); ovms::tensor_map_t tenMap; - std::shared_ptr tensorInfo = std::make_shared( + std::shared_ptr tensorInfo = std::make_shared( DUMMY_MODEL_INPUT_NAME, ovms::Precision::FP32, ovms::Shape{1, 10}, @@ -557,7 +555,7 @@ TEST(SerializeKFSGRPCPredictResponse, ShouldSuccessForSupportedPrecisionWithshar ov::CompiledModel compiledModel = ieCore.compile_model(model, "CPU"); ov::InferRequest inferRequest = compiledModel.create_infer_request(); ovms::tensor_map_t tenMap; - std::shared_ptr tensorInfo = std::make_shared( + std::shared_ptr tensorInfo = std::make_shared( DUMMY_MODEL_INPUT_NAME, ovms::Precision::FP32, ovms::Shape{1, 10}, @@ -594,11 +592,11 @@ INSTANTIATE_TEST_SUITE_P( // C-API -class CApiSerialization : public ::testing::TestWithParam { +class CAPISerialization : public ::testing::TestWithParam { protected: tensor_map_t prepareInputs(ovms::Precision precision, ovms::Shape shape = ovms::Shape{1, 10}) { tensor_map_t ret; - std::shared_ptr servableOutput = + std::shared_ptr servableOutput = std::make_shared(std::string(DUMMY_MODEL_OUTPUT_NAME), precision, shape, Layout{"NC"}); ret[DUMMY_MODEL_OUTPUT_NAME] = servableOutput; return ret; @@ -606,14 +604,14 @@ class CApiSerialization : public ::testing::TestWithParam { InferenceResponse response{"dummy", 1}; }; -TEST(SerializeCApiTensorSingle, NegativeMismatchBetweenTensorInfoAndTensorPrecision) { +TEST(SerializeCAPITensorSingle, NegativeMismatchBetweenTensorInfoAndTensorPrecision) { InferenceResponse response{"dummy", 1}; ov::Core ieCore; std::shared_ptr model = ieCore.read_model(std::filesystem::current_path().u8string() + "/src/test/dummy/1/dummy.xml"); ov::CompiledModel compiledModel = ieCore.compile_model(model, "CPU"); ov::InferRequest inferRequest = compiledModel.create_infer_request(); ovms::tensor_map_t tenMap; - std::shared_ptr tensorInfo = std::make_shared( + std::shared_ptr tensorInfo = std::make_shared( DUMMY_MODEL_OUTPUT_NAME, ovms::Precision::I32, // wrong precision ovms::Shape{1, 10}, @@ -628,14 +626,14 @@ TEST(SerializeCApiTensorSingle, NegativeMismatchBetweenTensorInfoAndTensorPrecis EXPECT_EQ(status.getCode(), ovms::StatusCode::INTERNAL_ERROR); } -TEST(SerializeCApiTensorSingle, NegativeMismatchBetweenTensorInfoAndTensorShape) { +TEST(SerializeCAPITensorSingle, NegativeMismatchBetweenTensorInfoAndTensorShape) { InferenceResponse response{"dummy", 1}; ov::Core ieCore; std::shared_ptr model = ieCore.read_model(std::filesystem::current_path().u8string() + "/src/test/dummy/1/dummy.xml"); ov::CompiledModel compiledModel = ieCore.compile_model(model, "CPU"); ov::InferRequest inferRequest = compiledModel.create_infer_request(); ovms::tensor_map_t tenMap; - std::shared_ptr tensorInfo = std::make_shared( + std::shared_ptr tensorInfo = std::make_shared( DUMMY_MODEL_OUTPUT_NAME, ovms::Precision::FP32, ovms::Shape{1, 8}, // wrong shape @@ -650,7 +648,7 @@ TEST(SerializeCApiTensorSingle, NegativeMismatchBetweenTensorInfoAndTensorShape) EXPECT_EQ(status.getCode(), ovms::StatusCode::INTERNAL_ERROR); } -class SerializeCApiTensorPositive : public CApiSerialization {}; +class SerializeCAPITensorPositive : public CAPISerialization {}; struct MockedTensorProvider { ov::Tensor& tensor; @@ -663,7 +661,7 @@ Status OutputGetter::get(const std::string& name, ov::Ten return StatusCode::OK; } -TEST_P(SerializeCApiTensorPositive, SerializeTensorShouldSucceedForPrecision) { +TEST_P(SerializeCAPITensorPositive, SerializeTensorShouldSucceedForPrecision) { ovms::Precision testedPrecision = GetParam(); ov::Tensor tensor(ovmsPrecisionToIE2Precision(testedPrecision), ov::Shape{1, 10}); MockedTensorProvider provider(tensor); @@ -684,15 +682,15 @@ TEST_P(SerializeCApiTensorPositive, SerializeTensorShouldSucceedForPrecision) { INSTANTIATE_TEST_SUITE_P( Test, - SerializeCApiTensorPositive, + SerializeCAPITensorPositive, ::testing::ValuesIn(SUPPORTED_CAPI_OUTPUT_PRECISIONS), - [](const ::testing::TestParamInfo& info) { + [](const ::testing::TestParamInfo& info) { return toString(info.param); }); -class SerializeCApiTensorNegative : public CApiSerialization {}; +class SerializeCAPITensorNegative : public CAPISerialization {}; -TEST_P(SerializeCApiTensorNegative, SerializeTensorShouldFailForPrecision) { +TEST_P(SerializeCAPITensorNegative, SerializeTensorShouldFailForPrecision) { ovms::Precision testedPrecision = GetParam(); ov::Tensor tensor(ovmsPrecisionToIE2Precision(testedPrecision), ov::Shape{1, 10}); MockedTensorProvider provider(tensor); @@ -713,13 +711,13 @@ TEST_P(SerializeCApiTensorNegative, SerializeTensorShouldFailForPrecision) { INSTANTIATE_TEST_SUITE_P( Test, - SerializeCApiTensorNegative, + SerializeCAPITensorNegative, ::testing::ValuesIn(UNSUPPORTED_CAPI_OUTPUT_PRECISIONS), - [](const ::testing::TestParamInfo& info) { + [](const ::testing::TestParamInfo& info) { return toString(info.param); }); -TEST_F(CApiSerialization, ValidSerialization) { +TEST_F(CAPISerialization, ValidSerialization) { constexpr size_t NUMBER_OF_ELEMENTS = 3; std::array data = {3.0, 2.0, 1.0}; shape_t shape{1, NUMBER_OF_ELEMENTS, 1, 1}; @@ -754,3 +752,63 @@ TEST_F(CApiSerialization, ValidSerialization) { EXPECT_EQ(buffer->getByteSize(), tensor.get_byte_size()); EXPECT_EQ(std::memcmp(tensor.data(), buffer->data(), sizeof(float) * NUMBER_OF_ELEMENTS), 0); } + +template +class SerializeString : public ::testing::Test { +public: + T response; +}; + +using MyTypes = ::testing::Types; +TYPED_TEST_SUITE(SerializeString, MyTypes); + +// Serialization to string due to suffix _string in mapping +TYPED_TEST(SerializeString, Valid_2D_U8_String) { + std::vector data = { + 'S', 't', 'r', 'i', 'n', 'g', '_', '1', '2', '3', 0, + 'z', 'e', 'b', 'r', 'a', 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + ov::Tensor tensor(ov::element::u8, ov::Shape{3, 11}, data.data()); + MockedTensorProvider provider(tensor); + OutputGetter outputGetter(provider); + + ovms::tensor_map_t infos; + infos["out_string"] = std::make_shared("out", "out_string", ovms::Precision::U8, ovms::Shape{-1, -1}, Layout{"N..."}); + + bool useSharedOutputContent = true; + ASSERT_EQ(serializePredictResponse(outputGetter, + UNUSED_NAME, + UNUSED_VERSION, + infos, + &this->response, + getTensorInfoName, + useSharedOutputContent), + ovms::StatusCode::OK); + assertStringResponse(this->response, {"String_123", "zebra", ""}, "out_string"); +} + +// Serialization to U8 due to missing suffix _string in mapping +TYPED_TEST(SerializeString, Valid_2D_U8_NonString) { + std::vector data = { + 'S', 't', 'r', 'i', 'n', 'g', '_', '1', '2', '3', 0, + 'z', 'e', 'b', 'r', 'a', 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + ov::Tensor tensor(ov::element::u8, ov::Shape{3, 11}, data.data()); + MockedTensorProvider provider(tensor); + OutputGetter outputGetter(provider); + + ovms::tensor_map_t infos; + infos["out_string"] = std::make_shared("out", "out", ovms::Precision::U8, ovms::Shape{-1, -1}, Layout{"N..."}); + + bool useSharedOutputContent = false; // TODO: support raw field + ASSERT_EQ(serializePredictResponse(outputGetter, + UNUSED_NAME, + UNUSED_VERSION, + infos, + &this->response, + getTensorInfoName, + useSharedOutputContent), + ovms::StatusCode::OK); + bool checkRaw = false; // raw not supported + checkIncrement4DimResponse("out", data, this->response, std::vector{3, 11}, checkRaw); +} diff --git a/src/test/server_test.cpp b/src/test/server_test.cpp index af594d8886..746d2d549f 100644 --- a/src/test/server_test.cpp +++ b/src/test/server_test.cpp @@ -111,14 +111,15 @@ class ServingClient { } }; -void requestServerAlive(const char* grpcPort, grpc::StatusCode status = grpc::StatusCode::OK, bool expectedStatus = true) { +static void requestServerAlive(const char* grpcPort, grpc::StatusCode status = grpc::StatusCode::OK, bool expectedStatus = true) { grpc::ChannelArguments args; std::string address = std::string("localhost") + ":" + grpcPort; SPDLOG_INFO("Verifying if server is live on address: {}", address); ServingClient client(grpc::CreateCustomChannel(address, grpc::InsecureChannelCredentials(), args)); client.verifyLive(status, expectedStatus); } -void requestServerReady(const char* grpcPort, grpc::StatusCode status = grpc::StatusCode::OK, bool expectedStatus = true) { + +static void requestServerReady(const char* grpcPort, grpc::StatusCode status = grpc::StatusCode::OK, bool expectedStatus = true) { grpc::ChannelArguments args; std::string address = std::string("localhost") + ":" + grpcPort; SPDLOG_INFO("Veryfying if server is ready on address: {}", address); @@ -126,7 +127,7 @@ void requestServerReady(const char* grpcPort, grpc::StatusCode status = grpc::St client.verifyReady(status, expectedStatus); } -void requestModelReady(const char* grpcPort, const std::string& modelName, grpc::StatusCode status = grpc::StatusCode::OK, bool expectedStatus = true) { +static void requestModelReady(const char* grpcPort, const std::string& modelName, grpc::StatusCode status = grpc::StatusCode::OK, bool expectedStatus = true) { grpc::ChannelArguments args; std::string address = std::string("localhost") + ":" + grpcPort; SPDLOG_INFO("Verifying if server is ready on address: {}", address); @@ -134,7 +135,7 @@ void requestModelReady(const char* grpcPort, const std::string& modelName, grpc: client.verifyModelReady(modelName, status, expectedStatus); } -void checkServerMetadata(const char* grpcPort, grpc::StatusCode status = grpc::StatusCode::OK) { +static void checkServerMetadata(const char* grpcPort, grpc::StatusCode status = grpc::StatusCode::OK) { grpc::ChannelArguments args; std::string address = std::string("localhost") + ":" + grpcPort; SPDLOG_INFO("Verifying if server responds with correct metadata on address: {}", address); @@ -304,7 +305,7 @@ TEST(Server, ServerMetadata) { }); auto start = std::chrono::high_resolution_clock::now(); while ((ovms::Server::instance().getModuleState("GRPCServerModule") != ovms::ModuleState::INITIALIZED) && - (std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start).count() < 1)) { + (std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start).count() < 5)) { } grpc::ChannelArguments args; diff --git a/src/test/stateful_modelinstance_test.cpp b/src/test/stateful_modelinstance_test.cpp index d29303fe0a..af75724c94 100644 --- a/src/test/stateful_modelinstance_test.cpp +++ b/src/test/stateful_modelinstance_test.cpp @@ -98,8 +98,8 @@ class StatefulModelInstanceTempDir : public TestWithTempDir { std::string dummyModelName; ovms::model_version_t modelVersion; inputs_info_t modelInput; - std::pair> sequenceId; - std::pair> sequenceControlStart; + std::pair> sequenceId; + std::pair> sequenceControlStart; std::unique_ptr ieCore; void SetUpConfig(const std::string& configContent) { @@ -118,7 +118,7 @@ class StatefulModelInstanceTempDir : public TestWithTempDir { SetUpConfig(modelStatefulConfig); std::filesystem::copy("/ovms/src/test/dummy", modelPath, std::filesystem::copy_options::recursive); modelInput = {{DUMMY_MODEL_INPUT_NAME, - std::tuple{{1, 10}, ovms::Precision::FP32}}}; + std::tuple{{1, 10}, ovms::Precision::FP32}}}; } void TearDown() override { @@ -130,8 +130,8 @@ class StatefulModelInstanceTempDir : public TestWithTempDir { class StatefulModelInstanceInputValidation : public ::testing::Test { public: inputs_info_t modelInput; - std::pair> sequenceId; - std::pair> sequenceControlStart; + std::pair> sequenceId; + std::pair> sequenceControlStart; std::unique_ptr ieCore; void SetUp() override { diff --git a/src/test/stateful_test_utils.hpp b/src/test/stateful_test_utils.hpp index e9f82d343b..ca81b969c2 100644 --- a/src/test/stateful_test_utils.hpp +++ b/src/test/stateful_test_utils.hpp @@ -32,8 +32,6 @@ #include "tensorflow_serving/apis/prediction_service.grpc.pb.h" #pragma GCC diagnostic pop -#include - const std::string SEQUENCE_ID_INPUT = "sequence_id"; const std::string SEQUENCE_CONTROL_INPUT = "sequence_control_input"; diff --git a/src/test/status_test.cpp b/src/test/status_test.cpp index 89396f6925..0d846a17d6 100644 --- a/src/test/status_test.cpp +++ b/src/test/status_test.cpp @@ -25,7 +25,7 @@ #include "test_utils.hpp" namespace ovms { -StatusCode& operator++(StatusCode& statusCode) { +static StatusCode& operator++(StatusCode& statusCode) { if (statusCode == StatusCode::STATUS_CODE_END) { throw std::out_of_range("for E& operator ++ (E&)"); } @@ -40,7 +40,7 @@ TEST(StatusCodeTest, AllStatusCodesMapped) { } } -TEST(StatusCodeTest, CApi) { +TEST(StatusCodeTest, CAPI) { for (auto statusCode = StatusCode::OK; statusCode != StatusCode::STATUS_CODE_END; ++statusCode) { Status status = Status(statusCode); OVMS_Status* sts = reinterpret_cast(&status); diff --git a/src/test/systeminfo_test.cpp b/src/test/systeminfo_test.cpp index 2b919e1771..a8b2922630 100644 --- a/src/test/systeminfo_test.cpp +++ b/src/test/systeminfo_test.cpp @@ -22,63 +22,11 @@ #include "../status.hpp" #include "../systeminfo.hpp" -#include "../systeminfo_impl.hpp" using namespace testing; using ovms::getCoreCount; -using ovms::getCoreCountImpl; -using ovms::getCPUSetFile; using ovms::StatusCode; -TEST(SystemInfoImpl, getCoreCountImplPositive) { - uint16_t coreCount = 42; - EXPECT_EQ(getCoreCountImpl("1", coreCount), StatusCode::OK); - EXPECT_EQ(coreCount, 1); - EXPECT_EQ(getCoreCountImpl("3", coreCount), StatusCode::OK); - EXPECT_EQ(coreCount, 1); - - EXPECT_EQ(getCoreCountImpl("0-1", coreCount), StatusCode::OK); - EXPECT_EQ(coreCount, 2); - EXPECT_EQ(getCoreCountImpl("1-3", coreCount), StatusCode::OK); - EXPECT_EQ(coreCount, 3); - - EXPECT_EQ(getCoreCountImpl("0,2-4", coreCount), StatusCode::OK); - EXPECT_EQ(coreCount, 4); - EXPECT_EQ(getCoreCountImpl("2-4,9", coreCount), StatusCode::OK); - EXPECT_EQ(coreCount, 4); - - EXPECT_EQ(getCoreCountImpl("2-4,9-12", coreCount), StatusCode::OK); - EXPECT_EQ(coreCount, 7); - EXPECT_EQ(getCoreCountImpl("2-4,9-12,123-125", coreCount), StatusCode::OK); - EXPECT_EQ(coreCount, 10); - - EXPECT_EQ(getCoreCountImpl("3,8,124,1096,1098", coreCount), StatusCode::OK); - EXPECT_EQ(coreCount, 5); - EXPECT_EQ(getCoreCountImpl("3,8,124,1096,1098,1099-1101", coreCount), StatusCode::OK); - EXPECT_EQ(coreCount, 8); - uint64_t nprocs = sysconf(_SC_NPROCESSORS_ONLN); - EXPECT_EQ(nprocs, getCoreCount()) << nprocs; -} -TEST(SystemInfoImpl, getCoreCountImplNegative) { - uint16_t coreCount = 42; - EXPECT_EQ(getCoreCountImpl("-1", coreCount), StatusCode::FILESYSTEM_ERROR); - EXPECT_EQ(coreCount, 42); - EXPECT_EQ(getCoreCountImpl("-33", coreCount), StatusCode::FILESYSTEM_ERROR); - EXPECT_EQ(coreCount, 42); - - EXPECT_EQ(getCoreCountImpl("35-33", coreCount), StatusCode::FILESYSTEM_ERROR); - EXPECT_EQ(coreCount, 42); - - EXPECT_EQ(getCoreCountImpl("33-35-37", coreCount), StatusCode::FILESYSTEM_ERROR); - EXPECT_EQ(coreCount, 42); - - EXPECT_EQ(getCoreCountImpl("1234567890123456789012345678901234567890", coreCount), StatusCode::FILESYSTEM_ERROR); - EXPECT_EQ(coreCount, 42); - // TODO test against maximum values - std::ifstream ifs; - EXPECT_EQ(getCPUSetFile(ifs, "/sys/fs/illegal_file"), StatusCode::FILESYSTEM_ERROR); -} - TEST(SystemInfo, getCoreCount) { uint16_t cpuCount = getCoreCount(); EXPECT_GE(cpuCount, 1); diff --git a/src/test/binaryutils_test.cpp b/src/test/tensor_conversion_test.cpp similarity index 51% rename from src/test/binaryutils_test.cpp rename to src/test/tensor_conversion_test.cpp index 427f4ff860..47e5b13c61 100644 --- a/src/test/binaryutils_test.cpp +++ b/src/test/tensor_conversion_test.cpp @@ -19,7 +19,7 @@ #include #include -#include "../binaryutils.hpp" +#include "../tensor_conversion.hpp" #include "opencv2/opencv.hpp" #include "test_utils.hpp" @@ -27,7 +27,7 @@ using namespace ovms; namespace { template -class BinaryUtilsTest : public ::testing::Test { +class NativeFileInputConversionTest : public ::testing::Test { public: TensorType requestTensor; void SetUp() override { @@ -74,80 +74,79 @@ class BinaryUtilsTest : public ::testing::Test { }; using MyTypes = ::testing::Types; -TYPED_TEST_SUITE(BinaryUtilsTest, MyTypes); +TYPED_TEST_SUITE(NativeFileInputConversionTest, MyTypes); -TYPED_TEST(BinaryUtilsTest, tensorWithNonMatchingBatchsize) { +TYPED_TEST(NativeFileInputConversionTest, tensorWithNonMatchingBatchsize) { ov::Tensor tensor; - auto tensorInfo = std::make_shared(); - tensorInfo->setShape({5, 1, 1, 1}); - tensorInfo->setLayout(Layout{"NHWC"}); + auto tensorInfo = std::make_shared( + "", ovms::Precision::U8, ovms::Shape{5, 1, 1, 1}, Layout{"NHWC"}); EXPECT_EQ(convertNativeFileFormatRequestTensorToOVTensor(this->requestTensor, tensor, tensorInfo, nullptr), ovms::StatusCode::INVALID_BATCH_SIZE); } -TYPED_TEST(BinaryUtilsTest, tensorWithInvalidImage) { +TYPED_TEST(NativeFileInputConversionTest, tensorWithInvalidImage) { TypeParam requestTensorInvalidImage; std::string invalidImage = "INVALID IMAGE"; this->prepareBinaryTensor(requestTensorInvalidImage, invalidImage); ov::Tensor tensor; - std::shared_ptr tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, 1, 1, 3}, Layout{"NHWC"}); + auto tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, 1, 1, 3}, Layout{"NHWC"}); EXPECT_EQ(convertNativeFileFormatRequestTensorToOVTensor(requestTensorInvalidImage, tensor, tensorInfo, nullptr), ovms::StatusCode::IMAGE_PARSING_FAILED); } -TYPED_TEST(BinaryUtilsTest, tensorWithEmptyTensor) { +TYPED_TEST(NativeFileInputConversionTest, tensorWithEmptyTensor) { TypeParam requestTensorEmptyInput; std::string emptyInput = ""; this->prepareBinaryTensor(requestTensorEmptyInput, emptyInput); ov::Tensor tensor; - std::shared_ptr tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, 1, 1, 3}, Layout{"NHWC"}); + auto tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, 1, 1, 3}, Layout{"NHWC"}); if (std::is_same::value) EXPECT_EQ(convertNativeFileFormatRequestTensorToOVTensor(requestTensorEmptyInput, tensor, tensorInfo, nullptr), ovms::StatusCode::STRING_VAL_EMPTY); else EXPECT_EQ(convertNativeFileFormatRequestTensorToOVTensor(requestTensorEmptyInput, tensor, tensorInfo, nullptr), StatusCode::BYTES_CONTENTS_EMPTY); } -TYPED_TEST(BinaryUtilsTest, tensorWithNonSupportedLayout) { +TYPED_TEST(NativeFileInputConversionTest, tensorWithNonSupportedLayout) { ov::Tensor tensor; - std::shared_ptr tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, 1, 1, 3}, Layout{"NCHW"}); + auto tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, 1, 1, 3}, Layout{"NCHW"}); EXPECT_EQ(convertNativeFileFormatRequestTensorToOVTensor(this->requestTensor, tensor, tensorInfo, nullptr), ovms::StatusCode::UNSUPPORTED_LAYOUT); } -TYPED_TEST(BinaryUtilsTest, tensorWithNonSupportedPrecision) { +TYPED_TEST(NativeFileInputConversionTest, tensorWithNonSupportedPrecision) { ov::Tensor tensor; - std::shared_ptr tensorInfo = std::make_shared("", ovms::Precision::MIXED, ovms::Shape{1, 1, 1, 3}, Layout{"NHWC"}); + auto tensorInfo = std::make_shared("", ovms::Precision::MIXED, ovms::Shape{1, 1, 1, 3}, Layout{"NHWC"}); EXPECT_EQ(convertNativeFileFormatRequestTensorToOVTensor(this->requestTensor, tensor, tensorInfo, nullptr), ovms::StatusCode::INVALID_PRECISION); } -TYPED_TEST(BinaryUtilsTest, tensorWithNonMatchingShapeSize) { +TYPED_TEST(NativeFileInputConversionTest, tensorWithNonMatchingShapeSize) { ov::Tensor tensor; - std::shared_ptr tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, 1}, Layout{"NC"}); + auto tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, 1}, Layout{"NC"}); EXPECT_EQ(convertNativeFileFormatRequestTensorToOVTensor(this->requestTensor, tensor, tensorInfo, nullptr), ovms::StatusCode::UNSUPPORTED_LAYOUT); } -TYPED_TEST(BinaryUtilsTest, tensorWithNonMatchingNumberOfChannelsNHWC) { +TYPED_TEST(NativeFileInputConversionTest, tensorWithNonMatchingNumberOfChannelsNHWC) { ov::Tensor tensor; - std::shared_ptr tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, 1, 1, 1}, Layout{"NHWC"}); + auto tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, 1, 1, 1}, Layout{"NHWC"}); EXPECT_EQ(convertNativeFileFormatRequestTensorToOVTensor(this->requestTensor, tensor, tensorInfo, nullptr), ovms::StatusCode::INVALID_NO_OF_CHANNELS); } -TYPED_TEST(BinaryUtilsTest, positive_rgb) { +TYPED_TEST(NativeFileInputConversionTest, positive_rgb) { uint8_t rgb_expected_tensor[] = {0x24, 0x1b, 0xed}; ov::Tensor tensor; - std::shared_ptr tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, 1, 1, 3}, Layout{"NHWC"}); + auto tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, 1, 1, 3}, Layout{"NHWC"}); ASSERT_EQ(convertNativeFileFormatRequestTensorToOVTensor(this->requestTensor, tensor, tensorInfo, nullptr), ovms::StatusCode::OK); ASSERT_EQ(tensor.get_size(), 3); @@ -155,7 +154,7 @@ TYPED_TEST(BinaryUtilsTest, positive_rgb) { EXPECT_EQ(std::equal(ptr, ptr + tensor.get_size(), rgb_expected_tensor), true); } -TYPED_TEST(BinaryUtilsTest, positive_grayscale) { +TYPED_TEST(NativeFileInputConversionTest, positive_grayscale) { uint8_t grayscale_expected_tensor[] = {0x00}; std::ifstream DataFile; @@ -170,7 +169,7 @@ TYPED_TEST(BinaryUtilsTest, positive_grayscale) { ov::Tensor tensor; this->prepareBinaryTensor(grayscaleRequestTensor, grayscale_image_bytes, grayscale_filesize); - std::shared_ptr tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, 1, 1, 1}, Layout{"NHWC"}); + auto tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, 1, 1, 1}, Layout{"NHWC"}); ASSERT_EQ(convertNativeFileFormatRequestTensorToOVTensor(grayscaleRequestTensor, tensor, tensorInfo, nullptr), ovms::StatusCode::OK); ASSERT_EQ(tensor.get_size(), 1); @@ -178,7 +177,7 @@ TYPED_TEST(BinaryUtilsTest, positive_grayscale) { EXPECT_EQ(std::equal(ptr, ptr + tensor.get_size(), grayscale_expected_tensor), true); } -TYPED_TEST(BinaryUtilsTest, positive_batch_size_2) { +TYPED_TEST(NativeFileInputConversionTest, positive_batch_size_2) { uint8_t rgb_batchsize_2_tensor[] = {0x24, 0x1b, 0xed, 0x24, 0x1b, 0xed}; ov::Tensor tensor; @@ -190,7 +189,7 @@ TYPED_TEST(BinaryUtilsTest, positive_batch_size_2) { this->prepareBinaryTensor(batchSize2RequestTensor, image_bytes, filesize, batchsize); for (const auto layout : std::vector{Layout("NHWC"), Layout::getDefaultLayout()}) { - std::shared_ptr tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{2, 1, 1, 3}, layout); + auto tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{2, 1, 1, 3}, layout); ASSERT_EQ(convertNativeFileFormatRequestTensorToOVTensor(batchSize2RequestTensor, tensor, tensorInfo, nullptr), ovms::StatusCode::OK); ASSERT_EQ(tensor.get_size(), 6); @@ -199,12 +198,12 @@ TYPED_TEST(BinaryUtilsTest, positive_batch_size_2) { } } -TYPED_TEST(BinaryUtilsTest, positive_precision_changed) { +TYPED_TEST(NativeFileInputConversionTest, positive_precision_changed) { uint8_t rgb_precision_changed_expected_tensor[] = {0x24, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0xed, 0x00, 0x00, 0x00}; ov::Tensor tensor; - std::shared_ptr tensorInfo = std::make_shared("", ovms::Precision::I32, ovms::Shape{1, 1, 1, 3}, Layout{"NHWC"}); + auto tensorInfo = std::make_shared("", ovms::Precision::I32, ovms::Shape{1, 1, 1, 3}, Layout{"NHWC"}); ASSERT_EQ(convertNativeFileFormatRequestTensorToOVTensor(this->requestTensor, tensor, tensorInfo, nullptr), ovms::StatusCode::OK); ASSERT_EQ(tensor.get_size(), 3); @@ -213,12 +212,12 @@ TYPED_TEST(BinaryUtilsTest, positive_precision_changed) { EXPECT_EQ(std::equal(ptr, ptr + tensor.get_size() * I32_size, rgb_precision_changed_expected_tensor), true); } -TYPED_TEST(BinaryUtilsTest, positive_nhwc_layout) { +TYPED_TEST(NativeFileInputConversionTest, positive_nhwc_layout) { uint8_t rgb_expected_tensor[] = {0x24, 0x1b, 0xed}; ov::Tensor tensor; - std::shared_ptr tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, 1, 1, 3}, Layout{"NHWC"}); + auto tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, 1, 1, 3}, Layout{"NHWC"}); ASSERT_EQ(convertNativeFileFormatRequestTensorToOVTensor(this->requestTensor, tensor, tensorInfo, nullptr), ovms::StatusCode::OK); ASSERT_EQ(tensor.get_size(), 3); @@ -227,18 +226,18 @@ TYPED_TEST(BinaryUtilsTest, positive_nhwc_layout) { EXPECT_EQ(std::equal(ptr, ptr + tensor.get_size(), rgb_expected_tensor), true); } -TYPED_TEST(BinaryUtilsTest, layout_default_resolution_mismatch) { +TYPED_TEST(NativeFileInputConversionTest, layout_default_resolution_mismatch) { ov::Tensor tensor; - std::shared_ptr tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, 3, 1, 3}, Layout::getDefaultLayout()); + auto tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, 3, 1, 3}, Layout::getDefaultLayout()); ASSERT_EQ(convertNativeFileFormatRequestTensorToOVTensor(this->requestTensor, tensor, tensorInfo, nullptr), ovms::StatusCode::INVALID_SHAPE); } -TYPED_TEST(BinaryUtilsTest, positive_resizing) { +TYPED_TEST(NativeFileInputConversionTest, positive_resizing) { uint8_t rgb_expected_tensor[] = {0x24, 0x1b, 0xed, 0x24, 0x1b, 0xed, 0x24, 0x1b, 0xed, 0x24, 0x1b, 0xed}; ov::Tensor tensor; - std::shared_ptr tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, 2, 2, 3}, Layout{"NHWC"}); + auto tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, 2, 2, 3}, Layout{"NHWC"}); ASSERT_EQ(convertNativeFileFormatRequestTensorToOVTensor(this->requestTensor, tensor, tensorInfo, nullptr), ovms::StatusCode::OK); ASSERT_EQ(tensor.get_size(), 12); @@ -246,12 +245,12 @@ TYPED_TEST(BinaryUtilsTest, positive_resizing) { EXPECT_EQ(std::equal(ptr, ptr + tensor.get_size(), rgb_expected_tensor), true); } -TYPED_TEST(BinaryUtilsTest, positive_resizing_with_dynamic_shape_cols_smaller) { +TYPED_TEST(NativeFileInputConversionTest, positive_resizing_with_dynamic_shape_cols_smaller) { uint8_t rgb_expected_tensor[] = {0x24, 0x1b, 0xed, 0x24, 0x1b, 0xed}; ov::Tensor tensor; - std::shared_ptr tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, 1, {2, 5}, 3}, Layout{"NHWC"}); + auto tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, 1, {2, 5}, 3}, Layout{"NHWC"}); ASSERT_EQ(convertNativeFileFormatRequestTensorToOVTensor(this->requestTensor, tensor, tensorInfo, nullptr), ovms::StatusCode::OK); shape_t tensorDims = tensor.get_shape(); @@ -262,7 +261,7 @@ TYPED_TEST(BinaryUtilsTest, positive_resizing_with_dynamic_shape_cols_smaller) { EXPECT_EQ(std::equal(ptr, ptr + tensor.get_size(), rgb_expected_tensor), true); } -TYPED_TEST(BinaryUtilsTest, positive_resizing_with_dynamic_shape_cols_bigger) { +TYPED_TEST(NativeFileInputConversionTest, positive_resizing_with_dynamic_shape_cols_bigger) { uint8_t rgb_expected_tensor[] = {0x96, 0x8f, 0xf3, 0x98, 0x9a, 0x81, 0x9d, 0xa9, 0x12}; size_t filesize; @@ -274,7 +273,7 @@ TYPED_TEST(BinaryUtilsTest, positive_resizing_with_dynamic_shape_cols_bigger) { ov::Tensor tensor; - std::shared_ptr tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, 1, {1, 3}, 3}, Layout{"NHWC"}); + auto tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, 1, {1, 3}, 3}, Layout{"NHWC"}); ASSERT_EQ(convertNativeFileFormatRequestTensorToOVTensor(requestTensor4x4, tensor, tensorInfo, nullptr), ovms::StatusCode::OK); shape_t tensorDims = tensor.get_shape(); @@ -285,12 +284,12 @@ TYPED_TEST(BinaryUtilsTest, positive_resizing_with_dynamic_shape_cols_bigger) { EXPECT_EQ(std::equal(ptr, ptr + tensor.get_size(), rgb_expected_tensor), true); } -TYPED_TEST(BinaryUtilsTest, positive_resizing_with_dynamic_shape_cols_in_range) { +TYPED_TEST(NativeFileInputConversionTest, positive_resizing_with_dynamic_shape_cols_in_range) { uint8_t rgb_expected_tensor[] = {0x24, 0x1b, 0xed}; ov::Tensor tensor; - std::shared_ptr tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, 1, {1, 3}, 3}, Layout{"NHWC"}); + auto tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, 1, {1, 3}, 3}, Layout{"NHWC"}); ASSERT_EQ(convertNativeFileFormatRequestTensorToOVTensor(this->requestTensor, tensor, tensorInfo, nullptr), ovms::StatusCode::OK); shape_t tensorDims = tensor.get_shape(); @@ -301,12 +300,12 @@ TYPED_TEST(BinaryUtilsTest, positive_resizing_with_dynamic_shape_cols_in_range) EXPECT_EQ(std::equal(ptr, ptr + tensor.get_size(), rgb_expected_tensor), true); } -TYPED_TEST(BinaryUtilsTest, positive_resizing_with_dynamic_shape_rows_smaller) { +TYPED_TEST(NativeFileInputConversionTest, positive_resizing_with_dynamic_shape_rows_smaller) { uint8_t rgb_expected_tensor[] = {0x24, 0x1b, 0xed, 0x24, 0x1b, 0xed}; ov::Tensor tensor; - std::shared_ptr tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, {2, 5}, 1, 3}, Layout{"NHWC"}); + auto tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, {2, 5}, 1, 3}, Layout{"NHWC"}); ASSERT_EQ(convertNativeFileFormatRequestTensorToOVTensor(this->requestTensor, tensor, tensorInfo, nullptr), ovms::StatusCode::OK); shape_t tensorDims = tensor.get_shape(); @@ -317,7 +316,7 @@ TYPED_TEST(BinaryUtilsTest, positive_resizing_with_dynamic_shape_rows_smaller) { EXPECT_EQ(std::equal(ptr, ptr + tensor.get_size(), rgb_expected_tensor), true); } -TYPED_TEST(BinaryUtilsTest, positive_resizing_with_dynamic_shape_rows_bigger) { +TYPED_TEST(NativeFileInputConversionTest, positive_resizing_with_dynamic_shape_rows_bigger) { uint8_t rgb_expected_tensor[] = {0x3f, 0x65, 0x88, 0x98, 0x9a, 0x81, 0xf5, 0xd2, 0x7c}; size_t filesize; @@ -329,7 +328,7 @@ TYPED_TEST(BinaryUtilsTest, positive_resizing_with_dynamic_shape_rows_bigger) { ov::Tensor tensor; - std::shared_ptr tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, {1, 3}, 1, 3}, Layout{"NHWC"}); + auto tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, {1, 3}, 1, 3}, Layout{"NHWC"}); ASSERT_EQ(convertNativeFileFormatRequestTensorToOVTensor(requestTensor4x4, tensor, tensorInfo, nullptr), ovms::StatusCode::OK); shape_t tensorDims = tensor.get_shape(); @@ -340,12 +339,12 @@ TYPED_TEST(BinaryUtilsTest, positive_resizing_with_dynamic_shape_rows_bigger) { EXPECT_EQ(std::equal(ptr, ptr + tensor.get_size(), rgb_expected_tensor), true); } -TYPED_TEST(BinaryUtilsTest, positive_resizing_with_dynamic_shape_rows_in_range) { +TYPED_TEST(NativeFileInputConversionTest, positive_resizing_with_dynamic_shape_rows_in_range) { uint8_t rgb_expected_tensor[] = {0x24, 0x1b, 0xed}; ov::Tensor tensor; - std::shared_ptr tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, {1, 3}, 1, 3}, Layout{"NHWC"}); + auto tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, {1, 3}, 1, 3}, Layout{"NHWC"}); ASSERT_EQ(convertNativeFileFormatRequestTensorToOVTensor(this->requestTensor, tensor, tensorInfo, nullptr), ovms::StatusCode::OK); shape_t tensorDims = tensor.get_shape(); @@ -356,12 +355,12 @@ TYPED_TEST(BinaryUtilsTest, positive_resizing_with_dynamic_shape_rows_in_range) EXPECT_EQ(std::equal(ptr, ptr + tensor.get_size(), rgb_expected_tensor), true); } -TYPED_TEST(BinaryUtilsTest, positive_resizing_with_any_shape) { +TYPED_TEST(NativeFileInputConversionTest, positive_resizing_with_any_shape) { uint8_t rgb_expected_tensor[] = {0x24, 0x1b, 0xed}; ov::Tensor tensor; - std::shared_ptr tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, ovms::Dimension::any(), ovms::Dimension::any(), 3}, Layout{"NHWC"}); + auto tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, ovms::Dimension::any(), ovms::Dimension::any(), 3}, Layout{"NHWC"}); ASSERT_EQ(convertNativeFileFormatRequestTensorToOVTensor(this->requestTensor, tensor, tensorInfo, nullptr), ovms::StatusCode::OK); shape_t tensorDims = tensor.get_shape(); @@ -374,18 +373,18 @@ TYPED_TEST(BinaryUtilsTest, positive_resizing_with_any_shape) { EXPECT_EQ(std::equal(ptr, ptr + tensor.get_size(), rgb_expected_tensor), true); } -TYPED_TEST(BinaryUtilsTest, negative_resizing_with_one_any_one_static_shape) { +TYPED_TEST(NativeFileInputConversionTest, negative_resizing_with_one_any_one_static_shape) { ov::Tensor tensor; - std::shared_ptr tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, ovms::Dimension::any(), 4, 3}, Layout{"NHWC"}); + auto tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, ovms::Dimension::any(), 4, 3}, Layout{"NHWC"}); ASSERT_EQ(convertNativeFileFormatRequestTensorToOVTensor(this->requestTensor, tensor, tensorInfo, nullptr), ovms::StatusCode::INVALID_SHAPE); } -TYPED_TEST(BinaryUtilsTest, positive_resizing_with_one_any_one_static_shape) { +TYPED_TEST(NativeFileInputConversionTest, positive_resizing_with_one_any_one_static_shape) { ov::Tensor tensor; - std::shared_ptr tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, ovms::Dimension::any(), 1, 3}, Layout{"NHWC"}); + auto tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, ovms::Dimension::any(), 1, 3}, Layout{"NHWC"}); ASSERT_EQ(convertNativeFileFormatRequestTensorToOVTensor(this->requestTensor, tensor, tensorInfo, nullptr), ovms::StatusCode::OK); shape_t tensorDims = tensor.get_shape(); @@ -396,11 +395,11 @@ TYPED_TEST(BinaryUtilsTest, positive_resizing_with_one_any_one_static_shape) { ASSERT_EQ(tensor.get_size(), 3); } -TYPED_TEST(BinaryUtilsTest, positive_resizing_with_demultiplexer_and_range_resolution) { +TYPED_TEST(NativeFileInputConversionTest, positive_resizing_with_demultiplexer_and_range_resolution) { ov::Tensor tensor; const int batchSize = 5; - std::shared_ptr tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, {1, 3}, {1, 3}, 3}, Layout{"NHWC"}); + auto tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, {1, 3}, {1, 3}, 3}, Layout{"NHWC"}); tensorInfo = tensorInfo->createCopyWithDemultiplexerDimensionPrefix(batchSize); size_t filesize; @@ -420,7 +419,7 @@ TYPED_TEST(BinaryUtilsTest, positive_resizing_with_demultiplexer_and_range_resol ASSERT_EQ(tensor.get_size(), batchSize * 1 * 3 * 3 * 3); } -TYPED_TEST(BinaryUtilsTest, positive_range_resolution_matching_in_between) { +TYPED_TEST(NativeFileInputConversionTest, positive_range_resolution_matching_in_between) { ov::Tensor tensor; const int batchSize = 5; @@ -432,7 +431,7 @@ TYPED_TEST(BinaryUtilsTest, positive_range_resolution_matching_in_between) { this->prepareBinaryTensor(requestTensor4x4, image_bytes, filesize, batchSize); for (const auto& batchDim : std::vector{Dimension::any(), Dimension(batchSize)}) { - std::shared_ptr tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{batchDim, {1, 5}, {1, 5}, 3}, Layout{"NHWC"}); + auto tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{batchDim, {1, 5}, {1, 5}, 3}, Layout{"NHWC"}); ASSERT_EQ(convertNativeFileFormatRequestTensorToOVTensor(requestTensor4x4, tensor, tensorInfo, nullptr), ovms::StatusCode::OK); shape_t tensorDims = tensor.get_shape(); @@ -444,7 +443,7 @@ TYPED_TEST(BinaryUtilsTest, positive_range_resolution_matching_in_between) { } } -class BinaryUtilsTFSPrecisionTest : public ::testing::TestWithParam { +class NativeFileInputConversionTFSPrecisionTest : public ::testing::TestWithParam { protected: void SetUp() override { readRgbJpg(filesize, image_bytes); @@ -457,13 +456,13 @@ class BinaryUtilsTFSPrecisionTest : public ::testing::TestWithParam tensorInfo = std::make_shared("", + auto tensorInfo = std::make_shared("", testedPrecision, ovms::Shape{1, 1, 1, 3}, Layout{"NHWC"}); @@ -475,10 +474,10 @@ TEST_P(BinaryUtilsTFSValidPrecisionTest, Valid) { ASSERT_EQ(tensor.get_element_type(), ovmsPrecisionToIE2Precision(testedPrecision)); } -TEST_P(BinaryUtilsTFSInvalidPrecisionTest, Invalid) { +TEST_P(NativeFileInputConversionTFSInvalidPrecisionTest, Invalid) { ovms::Precision testedPrecision = GetParam(); - std::shared_ptr tensorInfo = std::make_shared("", + auto tensorInfo = std::make_shared("", testedPrecision, ovms::Shape{1, 1, 1, 3}, Layout{"NHWC"}); @@ -507,9 +506,9 @@ const std::vector BINARY_SUPPORTED_INPUT_PRECISIONS{ INSTANTIATE_TEST_SUITE_P( Test, - BinaryUtilsTFSValidPrecisionTest, + NativeFileInputConversionTFSValidPrecisionTest, ::testing::ValuesIn(BINARY_SUPPORTED_INPUT_PRECISIONS), - [](const ::testing::TestParamInfo& info) { + [](const ::testing::TestParamInfo& info) { return toString(info.param); }); @@ -533,13 +532,13 @@ static const std::vector BINARY_UNSUPPORTED_INPUT_PRECISIONS{ INSTANTIATE_TEST_SUITE_P( Test, - BinaryUtilsTFSInvalidPrecisionTest, + NativeFileInputConversionTFSInvalidPrecisionTest, ::testing::ValuesIn(BINARY_UNSUPPORTED_INPUT_PRECISIONS), - [](const ::testing::TestParamInfo& info) { + [](const ::testing::TestParamInfo& info) { return toString(info.param); }); -class BinaryUtilsKFSPrecisionTest : public ::testing::TestWithParam { +class NativeFileInputConversionKFSPrecisionTest : public ::testing::TestWithParam { protected: void SetUp() override { readRgbJpg(filesize, image_bytes); @@ -551,13 +550,13 @@ class BinaryUtilsKFSPrecisionTest : public ::testing::TestWithParam tensorInfo = std::make_shared("", + auto tensorInfo = std::make_shared("", testedPrecision, ovms::Shape{1, 1, 1, 3}, Layout{"NHWC"}); @@ -569,10 +568,10 @@ TEST_P(BinaryUtilsKFSValidPrecisionTest, Valid) { ASSERT_EQ(tensor.get_element_type(), ovmsPrecisionToIE2Precision(testedPrecision)); } -TEST_P(BinaryUtilsKFSInvalidPrecisionTest, Invalid) { +TEST_P(NativeFileInputConversionKFSInvalidPrecisionTest, Invalid) { ovms::Precision testedPrecision = GetParam(); - std::shared_ptr tensorInfo = std::make_shared("", + auto tensorInfo = std::make_shared("", testedPrecision, ovms::Shape{1, 1, 1, 3}, Layout{"NHWC"}); @@ -583,21 +582,21 @@ TEST_P(BinaryUtilsKFSInvalidPrecisionTest, Invalid) { INSTANTIATE_TEST_SUITE_P( Test, - BinaryUtilsKFSValidPrecisionTest, + NativeFileInputConversionKFSValidPrecisionTest, ::testing::ValuesIn(BINARY_SUPPORTED_INPUT_PRECISIONS), - [](const ::testing::TestParamInfo& info) { + [](const ::testing::TestParamInfo& info) { return toString(info.param); }); INSTANTIATE_TEST_SUITE_P( Test, - BinaryUtilsKFSInvalidPrecisionTest, + NativeFileInputConversionKFSInvalidPrecisionTest, ::testing::ValuesIn(BINARY_UNSUPPORTED_INPUT_PRECISIONS), - [](const ::testing::TestParamInfo& info) { + [](const ::testing::TestParamInfo& info) { return toString(info.param); }); -class BinaryUtilsTestKFSRawInputsContents : public ::testing::Test { +class NativeFileInputConversionTestKFSRawInputsContents : public ::testing::Test { public: ::KFSRequest::InferInputTensor requestTensor; std::string buffer; @@ -609,37 +608,248 @@ class BinaryUtilsTestKFSRawInputsContents : public ::testing::Test { std::unique_ptr image_bytes; readRgbJpg(filesize, image_bytes); + uint8_t imageSize[] = {0x9E, 0x02, 0x00, 0x00}; + buffer.append((char*)imageSize, 4); buffer.append(image_bytes.get(), filesize); } }; -TEST_F(BinaryUtilsTestKFSRawInputsContents, Positive) { +TEST_F(NativeFileInputConversionTestKFSRawInputsContents, Positive) { uint8_t rgb_expected_tensor[] = {0x24, 0x1b, 0xed}; ov::Tensor tensor; - std::shared_ptr tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, 1, 1, 3}, Layout{"NHWC"}); - + auto tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, 1, 1, 3}, Layout{"NHWC"}); ASSERT_EQ(convertNativeFileFormatRequestTensorToOVTensor(this->requestTensor, tensor, tensorInfo, &this->buffer), ovms::StatusCode::OK); ASSERT_EQ(tensor.get_size(), 3); uint8_t* ptr = static_cast(tensor.data()); EXPECT_EQ(std::equal(ptr, ptr + tensor.get_size(), rgb_expected_tensor), true); } -TEST_F(BinaryUtilsTestKFSRawInputsContents, Negative_batchSizeBiggerThan1) { +TEST_F(NativeFileInputConversionTestKFSRawInputsContents, Positive_batchSizeBiggerThan1) { + uint8_t rgb_expected_tensor[] = {0x24, 0x1b, 0xed, 0x24, 0x1b, 0xed}; + this->requestTensor.mutable_shape()->Clear(); + this->requestTensor.mutable_shape()->Add(2); + + size_t filesize; + std::unique_ptr image_bytes; + readRgbJpg(filesize, image_bytes); + uint8_t imageSize[] = {0x9E, 0x02, 0x00, 0x00}; + this->buffer.append((char*)imageSize, 4); + this->buffer.append(image_bytes.get(), filesize); + ov::Tensor tensor; - std::shared_ptr tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{2, 1, 1, 3}, Layout{"NHWC"}); + auto tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{2, 1, 1, 3}, Layout{"NHWC"}); - ASSERT_EQ(convertNativeFileFormatRequestTensorToOVTensor(this->requestTensor, tensor, tensorInfo, &this->buffer), ovms::StatusCode::INVALID_BATCH_SIZE); + ASSERT_EQ(convertNativeFileFormatRequestTensorToOVTensor(this->requestTensor, tensor, tensorInfo, &this->buffer), ovms::StatusCode::OK); + ASSERT_EQ(tensor.get_size(), 6); + uint8_t* ptr = static_cast(tensor.data()); + EXPECT_EQ(std::equal(ptr, ptr + tensor.get_size(), rgb_expected_tensor), true); +} + +TEST_F(NativeFileInputConversionTestKFSRawInputsContents, Negative_batchSizeBiggerThan1WithEmptyString) { + this->requestTensor.mutable_shape()->Clear(); + this->requestTensor.mutable_shape()->Add(2); + + uint8_t imageSize[] = {0x00, 0x00, 0x00, 0x00}; + this->buffer.append((char*)imageSize, 4); + + ov::Tensor tensor; + + auto tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{2, 1, 1, 3}, Layout{"NHWC"}); + + ASSERT_EQ(convertNativeFileFormatRequestTensorToOVTensor(this->requestTensor, tensor, tensorInfo, &this->buffer), ovms::StatusCode::IMAGE_PARSING_FAILED); } -TEST_F(BinaryUtilsTestKFSRawInputsContents, Negative_emptyString) { +TEST_F(NativeFileInputConversionTestKFSRawInputsContents, Negative_emptyString) { ov::Tensor tensor; - std::shared_ptr tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, 1, 1, 3}, Layout{"NHWC"}); + auto tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{1, 1, 1, 3}, Layout{"NHWC"}); std::string empty; - ASSERT_EQ(convertNativeFileFormatRequestTensorToOVTensor(this->requestTensor, tensor, tensorInfo, &empty), ovms::StatusCode::BYTES_CONTENTS_EMPTY); + ASSERT_EQ(convertNativeFileFormatRequestTensorToOVTensor(this->requestTensor, tensor, tensorInfo, &empty), ovms::StatusCode::INVALID_BATCH_SIZE); } + +TEST_F(NativeFileInputConversionTestKFSRawInputsContents, Negative_invalidFormat) { + this->requestTensor.mutable_shape()->Clear(); + this->requestTensor.mutable_shape()->Add(2); + + uint8_t imageSize[] = {0x01, 0x00, 0x00, 0x00}; + this->buffer.append((char*)imageSize, 4); + + ov::Tensor tensor; + auto tensorInfo = std::make_shared("", ovms::Precision::U8, ovms::Shape{2, 1, 1, 3}, Layout{"NHWC"}); + + ASSERT_EQ(convertNativeFileFormatRequestTensorToOVTensor(this->requestTensor, tensor, tensorInfo, &this->buffer), ovms::StatusCode::INVALID_BATCH_SIZE); +} + +template +class StringInputsConversionTest : public ::testing::Test { +public: + TensorType requestTensor; + void SetUp() override {} + + void prepareStringTensor(tensorflow::TensorProto& tensor, std::vector inputStrings) { + prepareInferStringTensor(tensor, "UNUSED", inputStrings, true, nullptr); + } + void prepareStringTensor(::KFSRequest::InferInputTensor& tensor, std::vector inputStrings) { + prepareInferStringTensor(tensor, "UNUSED", inputStrings, true, nullptr); + } + + void prepareStringTensorWithRawInputContents(tensorflow::TensorProto& tensor, std::vector inputStrings, std::string& buffer) { + SPDLOG_ERROR("RawInputContents not supported for TFS API"); + } + void prepareStringTensorWithRawInputContents(::KFSRequest::InferInputTensor& tensor, std::vector inputStrings, std::string& buffer) { + prepareInferStringTensor(tensor, "UNUSED", inputStrings, false, &buffer); + } +}; + +using MyTypes = ::testing::Types; +TYPED_TEST_SUITE(StringInputsConversionTest, MyTypes); + +TYPED_TEST(StringInputsConversionTest, positive) { + std::vector expectedStrings = {"String_123"}; + this->prepareStringTensor(this->requestTensor, expectedStrings); + ov::Tensor tensor; + ASSERT_EQ(convertStringRequestToOVTensor2D(this->requestTensor, tensor, nullptr), ovms::StatusCode::OK); + assertOutputTensorMatchExpectations(tensor, expectedStrings); +} + +TYPED_TEST(StringInputsConversionTest, rawInputContents_positive) { + if (typeid(TypeParam) == typeid(TFSInputTensorType)) + GTEST_SKIP() << "String inputs in buffer not supported for TFS api"; + std::vector expectedStrings = {"String_123"}; + std::string rawInputContents; + this->prepareStringTensorWithRawInputContents(this->requestTensor, expectedStrings, rawInputContents); + ov::Tensor tensor; + ASSERT_EQ(convertStringRequestToOVTensor2D(this->requestTensor, tensor, &rawInputContents), ovms::StatusCode::OK); + assertOutputTensorMatchExpectations(tensor, expectedStrings); +} + +TYPED_TEST(StringInputsConversionTest, positive_batch_size_2) { + std::vector expectedStrings = {"String_123", "zebra"}; + this->prepareStringTensor(this->requestTensor, expectedStrings); + ov::Tensor tensor; + ASSERT_EQ(convertStringRequestToOVTensor2D(this->requestTensor, tensor, nullptr), ovms::StatusCode::OK); + assertOutputTensorMatchExpectations(tensor, expectedStrings); +} + +TYPED_TEST(StringInputsConversionTest, rawInputContents_positive_batch_size_2) { + if (typeid(TypeParam) == typeid(TFSInputTensorType)) + GTEST_SKIP() << "String inputs in buffer not supported for TFS api"; + std::vector expectedStrings = {"String_123", "zebra"}; + std::string rawInputContents; + this->prepareStringTensorWithRawInputContents(this->requestTensor, expectedStrings, rawInputContents); + ov::Tensor tensor; + ASSERT_EQ(convertStringRequestToOVTensor2D(this->requestTensor, tensor, &rawInputContents), ovms::StatusCode::OK); + assertOutputTensorMatchExpectations(tensor, expectedStrings); +} + +TYPED_TEST(StringInputsConversionTest, positive_batch_size_3_one_string_empty) { + std::vector expectedStrings = {"String_123", "zebra", ""}; + this->prepareStringTensor(this->requestTensor, expectedStrings); + ov::Tensor tensor; + ASSERT_EQ(convertStringRequestToOVTensor2D(this->requestTensor, tensor, nullptr), ovms::StatusCode::OK); + assertOutputTensorMatchExpectations(tensor, expectedStrings); +} + +TYPED_TEST(StringInputsConversionTest, rawInputContents_positive_batch_size_3_one_string_empty) { + if (typeid(TypeParam) == typeid(TFSInputTensorType)) + GTEST_SKIP() << "String inputs in buffer not supported for TFS api"; + std::vector expectedStrings = {"String_123", "zebra", ""}; + std::string rawInputContents; + this->prepareStringTensorWithRawInputContents(this->requestTensor, expectedStrings, rawInputContents); + ov::Tensor tensor; + ASSERT_EQ(convertStringRequestToOVTensor2D(this->requestTensor, tensor, &rawInputContents), ovms::StatusCode::OK); + assertOutputTensorMatchExpectations(tensor, expectedStrings); +} + +TYPED_TEST(StringInputsConversionTest, positive_empty_inputs) { + // This case can't happen because request validation dont allow empty strings + std::vector expectedStrings = {}; + this->prepareStringTensor(this->requestTensor, expectedStrings); + ov::Tensor tensor; + ASSERT_EQ(convertStringRequestToOVTensor2D(this->requestTensor, tensor, nullptr), ovms::StatusCode::OK); + assertOutputTensorMatchExpectations(tensor, expectedStrings); +} + +TYPED_TEST(StringInputsConversionTest, rawInputContents_positive_empty_inputs) { + // This case can't happen because request validation dont allow empty strings + if (typeid(TypeParam) == typeid(TFSInputTensorType)) + GTEST_SKIP() << "String inputs in buffer not supported for TFS api"; + std::vector expectedStrings = {}; + std::string rawInputContents; + this->prepareStringTensorWithRawInputContents(this->requestTensor, expectedStrings, rawInputContents); + ov::Tensor tensor; + ASSERT_EQ(convertStringRequestToOVTensor2D(this->requestTensor, tensor, &rawInputContents), ovms::StatusCode::OK); + assertOutputTensorMatchExpectations(tensor, expectedStrings); +} + +TYPED_TEST(StringInputsConversionTest, u8_1d) { + std::vector expectedStrings = {"ala", "", "ma", "kota"}; + this->prepareStringTensor(this->requestTensor, expectedStrings); + ov::Tensor tensor; + ASSERT_EQ(convertStringRequestToOVTensor1D(this->requestTensor, tensor, nullptr), ovms::StatusCode::OK); + ASSERT_EQ(tensor.get_element_type(), ov::element::u8); + ASSERT_EQ(tensor.get_size(), 33); + std::vector expectedData = { + 4, 0, 0, 0, // batch size + 0, 0, 0, 0, // first string start offset + 3, 0, 0, 0, // end of "ala" in condensed content + 3, 0, 0, 0, // end of "" in condensed content + 5, 0, 0, 0, // end of "ma" in condensed content + 9, 0, 0, 0, // end of "kota" in condensed content + 'a', 'l', 'a', + 'm', 'a', + 'k', 'o', 't', 'a'}; + ASSERT_EQ(std::memcmp(reinterpret_cast(tensor.data()), expectedData.data(), expectedData.size()), 0) + << readableError(reinterpret_cast(tensor.data()), expectedData.data(), expectedData.size()); +} + +TYPED_TEST(StringInputsConversionTest, rawInputContents_u8_1d) { + if (typeid(TypeParam) == typeid(TFSInputTensorType)) + GTEST_SKIP() << "String inputs in buffer not supported for TFS api"; + std::vector expectedStrings = {"ala", "", "ma", "kota"}; + std::string rawInputContents; + this->prepareStringTensorWithRawInputContents(this->requestTensor, expectedStrings, rawInputContents); + ov::Tensor tensor; + ASSERT_EQ(convertStringRequestToOVTensor1D(this->requestTensor, tensor, &rawInputContents), ovms::StatusCode::OK); + ASSERT_EQ(tensor.get_element_type(), ov::element::u8); + ASSERT_EQ(tensor.get_size(), 33); + std::vector expectedData = { + 4, 0, 0, 0, // batch size + 0, 0, 0, 0, // first string start offset + 3, 0, 0, 0, // end of "ala" in condensed content + 3, 0, 0, 0, // end of "" in condensed content + 5, 0, 0, 0, // end of "ma" in condensed content + 9, 0, 0, 0, // end of "kota" in condensed content + 'a', 'l', 'a', + 'm', 'a', + 'k', 'o', 't', 'a'}; + ASSERT_EQ(std::memcmp(reinterpret_cast(tensor.data()), expectedData.data(), expectedData.size()), 0) + << readableError(reinterpret_cast(tensor.data()), expectedData.data(), expectedData.size()); +} + +template +class StringOutputsConversionTest : public ::testing::Test { +public: + TensorType responseTensor; + void SetUp() override {} +}; + +using OutputTensorProtos = ::testing::Types; +TYPED_TEST_SUITE(StringOutputsConversionTest, OutputTensorProtos); + +TYPED_TEST(StringOutputsConversionTest, positive) { + std::vector _2dTensorData = { + 'S', 't', 'r', 'i', 'n', 'g', '_', '1', '2', '3', 0, // String_123 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // "" + 'z', 'e', 'b', 'r', 'a', 0, 0, 0, 0, 0, 0}; // "zebra" + ov::Shape _2dTensorShape = {3, 11}; + ov::Tensor tensor(ov::element::u8, _2dTensorShape, _2dTensorData.data()); + ASSERT_EQ(convertOVTensor2DToStringResponse(tensor, this->responseTensor), ovms::StatusCode::OK); + assertStringOutputProto(this->responseTensor, {"String_123", "", "zebra"}); +} + } // namespace diff --git a/src/test/tensorinfo_test.cpp b/src/test/tensorinfo_test.cpp index edf451a350..2f79bec90f 100644 --- a/src/test/tensorinfo_test.cpp +++ b/src/test/tensorinfo_test.cpp @@ -24,14 +24,14 @@ using namespace ovms; TEST(TensorInfo, Intersection) { // match - auto first = std::make_shared("a", "b", Precision::FP32, Shape({1, 3, 224, {220, 230}}), Layout{"NCHW"}); - auto second = std::make_shared("a", "b", Precision::FP32, Shape({1, Dimension::any(), {220, 225}, {200, 300}}), Layout{"NCHW"}); + auto first = std::make_shared("a", "b", Precision::FP32, Shape({1, 3, 224, {220, 230}}), Layout{"NCHW"}); + auto second = std::make_shared("a", "b", Precision::FP32, Shape({1, Dimension::any(), {220, 225}, {200, 300}}), Layout{"NCHW"}); auto intersect = first->createIntersection(*second); ASSERT_NE(intersect, nullptr); EXPECT_TRUE(intersect->isTensorSpecEqual(TensorInfo("a", "b", Precision::FP32, Shape({1, 3, 224, {220, 230}}), Layout{"NCHW"}))) << intersect->asString(); // precision UNDEFINED - first = std::make_shared("a", "b", Precision::FP32, Shape({1, 3, 224, {220, 230}}), Layout{"NCHW"}); - second = std::make_shared("a", "b", Precision::UNDEFINED, Shape({1, 3, 224, {220, 230}}), Layout{"NCHW"}); + first = std::make_shared("a", "b", Precision::FP32, Shape({1, 3, 224, {220, 230}}), Layout{"NCHW"}); + second = std::make_shared("a", "b", Precision::UNDEFINED, Shape({1, 3, 224, {220, 230}}), Layout{"NCHW"}); intersect = first->createIntersection(*second); ASSERT_NE(intersect, nullptr); EXPECT_TRUE(intersect->isTensorSpecEqual(TensorInfo("a", "b", Precision::FP32, Shape({1, 3, 224, {220, 230}}), Layout{"NCHW"}))) << intersect->asString(); @@ -43,35 +43,35 @@ TEST(TensorInfo, Intersection) { EXPECT_TRUE(intersect->isTensorSpecEqual(*first)) << intersect->asString(); // uspecified should succeed with any first = TensorInfo::getUnspecifiedTensorInfo(); - second = std::make_shared("a", "b", Precision::FP32, Shape({1, Dimension::any(), {220, 225}, {200, 300}}), Layout{"NCHW"}); + second = std::make_shared("a", "b", Precision::FP32, Shape({1, Dimension::any(), {220, 225}, {200, 300}}), Layout{"NCHW"}); intersect = first->createIntersection(*second); ASSERT_NE(intersect, nullptr); EXPECT_TRUE(intersect->isTensorSpecEqual(*second)) << intersect->asString(); // default layout should match any - first = std::make_shared("a", "b", Precision::FP32, Shape({1, Dimension::any(), {220, 225}, {200, 300}}), Layout::getDefaultLayout()); - second = std::make_shared("a", "b", Precision::FP32, Shape({1, Dimension::any(), {220, 225}, {200, 300}}), Layout{"NCHW"}); + first = std::make_shared("a", "b", Precision::FP32, Shape({1, Dimension::any(), {220, 225}, {200, 300}}), Layout::getDefaultLayout()); + second = std::make_shared("a", "b", Precision::FP32, Shape({1, Dimension::any(), {220, 225}, {200, 300}}), Layout{"NCHW"}); intersect = first->createIntersection(*second); ASSERT_NE(intersect, nullptr); EXPECT_TRUE(intersect->isTensorSpecEqual(*second)) << intersect->asString(); // precision mismatch - first = std::make_shared("a", "b", Precision::FP32, Shape({1, 3, 224, 224}), Layout{"NCHW"}); - second = std::make_shared("a", "b", Precision::I32, Shape({1, 3, 224, 224}), Layout{"NCHW"}); + first = std::make_shared("a", "b", Precision::FP32, Shape({1, 3, 224, 224}), Layout{"NCHW"}); + second = std::make_shared("a", "b", Precision::I32, Shape({1, 3, 224, 224}), Layout{"NCHW"}); EXPECT_EQ(first->createIntersection(*second), nullptr); // layout order mismatch - first = std::make_shared("a", "b", Precision::FP32, Shape({1, 3, 224, 224}), Layout{"NCHW"}); - second = std::make_shared("a", "b", Precision::FP32, Shape({1, 3, 224, 224}), Layout{"NHWC"}); + first = std::make_shared("a", "b", Precision::FP32, Shape({1, 3, 224, 224}), Layout{"NCHW"}); + second = std::make_shared("a", "b", Precision::FP32, Shape({1, 3, 224, 224}), Layout{"NHWC"}); EXPECT_EQ(first->createIntersection(*second), nullptr); // name mismatch - first = std::make_shared("a", "b", Precision::FP32, Shape({1, 3, 224, 224}), Layout{"NCHW"}); - second = std::make_shared("a2", "b", Precision::FP32, Shape({1, 3, 224, 224}), Layout{"NCHW"}); + first = std::make_shared("a", "b", Precision::FP32, Shape({1, 3, 224, 224}), Layout{"NCHW"}); + second = std::make_shared("a2", "b", Precision::FP32, Shape({1, 3, 224, 224}), Layout{"NCHW"}); EXPECT_EQ(first->createIntersection(*second), nullptr); // mapped name mismatch - first = std::make_shared("a", "b", Precision::FP32, Shape({1, 3, 224, 224}), Layout{"NCHW"}); - second = std::make_shared("a", "b2", Precision::FP32, Shape({1, 3, 224, 224}), Layout{"NCHW"}); + first = std::make_shared("a", "b", Precision::FP32, Shape({1, 3, 224, 224}), Layout{"NCHW"}); + second = std::make_shared("a", "b2", Precision::FP32, Shape({1, 3, 224, 224}), Layout{"NCHW"}); EXPECT_EQ(first->createIntersection(*second), nullptr); // intersect with demultiplexer - first = std::make_shared("a", "b", Precision::FP32, Shape({1, 3, 224, 224}), Layout{"N...H?"}); - second = std::make_shared("a", "b", Precision::FP32, Shape({1, 3, 224, 224}), Layout{"NCH..."}); + first = std::make_shared("a", "b", Precision::FP32, Shape({1, 3, 224, 224}), Layout{"N...H?"}); + second = std::make_shared("a", "b", Precision::FP32, Shape({1, 3, 224, 224}), Layout{"NCH..."}); intersect = first->createIntersection(*second); ASSERT_NE(intersect, nullptr); intersect = intersect->createCopyWithDemultiplexerDimensionPrefix(Dimension::any()); @@ -79,7 +79,7 @@ TEST(TensorInfo, Intersection) { } TEST(TensorInfo, LayoutWithAppliedDemultiplexer) { - auto info = std::make_shared("a", "b", Precision::FP32, Shape({1, 3, 224, {220, 230}}), Layout{"NCHW"}); + auto info = std::make_shared("a", "b", Precision::FP32, Shape({1, 3, 224, {220, 230}}), Layout{"NCHW"}); info = info->createCopyWithDemultiplexerDimensionPrefix(100); EXPECT_TRUE(info->isInfluencedByDemultiplexer()); EXPECT_EQ(info->getShape(), (Shape{100, 1, 3, 224, Dimension{220, 230}})) << info->getShape().toString(); diff --git a/src/test/test_utils.cpp b/src/test/test_utils.cpp index 0816089605..8039529944 100644 --- a/src/test/test_utils.cpp +++ b/src/test/test_utils.cpp @@ -15,6 +15,7 @@ //***************************************************************************** #include "test_utils.hpp" +#include #include #include "../capi_frontend/capi_utils.hpp" @@ -33,6 +34,9 @@ void prepareBinaryPredictRequest(ovms::InferenceRequest& request, const std::str void prepareBinaryPredictRequestNoShape(ovms::InferenceRequest& request, const std::string& inputName, const int batchSize) { throw 42; } // CAPI binary not supported void prepareBinary4x4PredictRequest(ovms::InferenceRequest& request, const std::string& inputName, const int batchSize) { throw 42; } // CAPI binary not supported +void prepareInferStringRequest(ovms::InferenceRequest& request, const std::string& name, const std::vector& data, bool putBufferInInputTensorContent) { throw 42; } // CAPI binary not supported +void prepareInferStringTensor(ovms::InferenceTensor& tensor, const std::string& name, const std::vector& data, bool putBufferInInputTensorContent, std::string* content) { throw 42; } // CAPI binary not supported + void preparePredictRequest(::KFSRequest& request, inputs_info_t requestInputs, const std::vector& data, bool putBufferInInputTensorContent) { request.mutable_inputs()->Clear(); request.mutable_raw_input_contents()->Clear(); @@ -102,17 +106,17 @@ void preparePredictRequest(tensorflow::serving::PredictRequest& request, inputs_ } void waitForOVMSConfigReload(ovms::ModelManager& manager) { - // This is effectively multiplying by 1.2 to have 1 config reload in between + // This is effectively multiplying by 1.8 to have 1 config reload in between // two test steps - const float WAIT_MULTIPLIER_FACTOR = 1.2; + const float WAIT_MULTIPLIER_FACTOR = 1.8; const uint waitTime = WAIT_MULTIPLIER_FACTOR * manager.getWatcherIntervalSec() * 1000; std::this_thread::sleep_for(std::chrono::milliseconds(waitTime)); } void waitForOVMSResourcesCleanup(ovms::ModelManager& manager) { - // This is effectively multiplying by 1.2 to have 1 config reload in between + // This is effectively multiplying by 1.8 to have 1 config reload in between // two test steps - const float WAIT_MULTIPLIER_FACTOR = 1.2; + const float WAIT_MULTIPLIER_FACTOR = 1.8; const uint waitTime = WAIT_MULTIPLIER_FACTOR * manager.getResourcesCleanupIntervalSec() * 1000; std::this_thread::sleep_for(std::chrono::milliseconds(waitTime)); } @@ -189,6 +193,35 @@ void checkDummyResponse(const std::string outputName, << readableError(expected_output, actual_output, dataLengthToCheck / sizeof(float)); } +void checkAddResponse(const std::string outputName, + const std::vector& requestData1, + const std::vector& requestData2, + ::KFSRequest& request, ::KFSResponse& response, int seriesLength, int batchSize, const std::string& servableName) { + ASSERT_EQ(response.model_name(), servableName); + ASSERT_EQ(response.outputs_size(), 1); + ASSERT_EQ(response.raw_output_contents_size(), 1); + ASSERT_EQ(response.outputs().begin()->name(), outputName) << "Did not find:" << outputName; + const auto& output_proto = *response.outputs().begin(); + std::string* content = response.mutable_raw_output_contents(0); + + ASSERT_EQ(content->size(), batchSize * DUMMY_MODEL_OUTPUT_SIZE * sizeof(float)); + ASSERT_EQ(output_proto.shape_size(), 2); + ASSERT_EQ(output_proto.shape(0), batchSize); + ASSERT_EQ(output_proto.shape(1), DUMMY_MODEL_OUTPUT_SIZE); + + std::vector responseData = requestData1; + for (size_t i = 0; i < requestData1.size(); ++i) { + responseData[i] += requestData2[i]; + } + + float* actual_output = (float*)content->data(); + float* expected_output = responseData.data(); + const int dataLengthToCheck = DUMMY_MODEL_OUTPUT_SIZE * batchSize * sizeof(float); + EXPECT_EQ(actual_output[0], expected_output[0]); + EXPECT_EQ(0, std::memcmp(actual_output, expected_output, dataLengthToCheck)) + << readableError(expected_output, actual_output, dataLengthToCheck / sizeof(float)); +} + void checkIncrement4DimShape(const std::string outputName, PredictResponse& response, const std::vector& expectedShape) { @@ -273,6 +306,144 @@ void read4x4RgbJpg(size_t& filesize, std::unique_ptr& image_bytes) { return readImage("/ovms/src/test/binaryutils/rgb4x4.jpg", filesize, image_bytes); } +void prepareInferStringTensor(::KFSRequest::InferInputTensor& tensor, const std::string& name, const std::vector& data, bool putBufferInInputTensorContent, std::string* content) { + if (!putBufferInInputTensorContent && content == nullptr) { + throw std::runtime_error("Preparation of infer string tensor failed"); + return; + } + tensor.set_name(name); + tensor.set_datatype("BYTES"); + tensor.mutable_shape()->Clear(); + tensor.add_shape(data.size()); + if (!putBufferInInputTensorContent) { + size_t dataSize = 0; + for (auto input : data) { + dataSize += input.size() + 4; + } + content->resize(dataSize); + size_t offset = 0; + for (auto input : data) { + uint32_t inputSize = input.size(); + std::memcpy(content->data() + offset, reinterpret_cast(&inputSize), sizeof(uint32_t)); + offset += sizeof(uint32_t); + std::memcpy(content->data() + offset, input.data(), input.length()); + offset += input.length(); + } + } else { + for (auto inputData : data) { + auto bytes_val = tensor.mutable_contents()->mutable_bytes_contents()->Add(); + bytes_val->append(inputData.data(), inputData.size()); + } + } +} + +void prepareInferStringRequest(::KFSRequest& request, const std::string& name, const std::vector& data, bool putBufferInInputTensorContent) { + auto it = request.mutable_inputs()->begin(); + size_t bufferId = 0; + while (it != request.mutable_inputs()->end()) { + if (it->name() == name) + break; + ++it; + ++bufferId; + } + KFSTensorInputProto* tensor; + std::string* content = nullptr; + if (it != request.mutable_inputs()->end()) { + tensor = &*it; + if (!putBufferInInputTensorContent) { + content = request.mutable_raw_input_contents()->Mutable(bufferId); + } + } else { + tensor = request.add_inputs(); + if (!putBufferInInputTensorContent) { + content = request.add_raw_input_contents(); + } + } + prepareInferStringTensor(*tensor, name, data, putBufferInInputTensorContent, content); +} + +void prepareInferStringTensor(tensorflow::TensorProto& tensor, const std::string& name, const std::vector& data, bool putBufferInInputTensorContent, std::string* content) { + tensor.set_dtype(tensorflow::DataType::DT_STRING); + tensor.mutable_tensor_shape()->add_dim()->set_size(data.size()); + for (auto inputData : data) { + tensor.add_string_val(inputData); + } +} + +void prepareInferStringRequest(tensorflow::serving::PredictRequest& request, const std::string& name, const std::vector& data, bool putBufferInInputTensorContent) { + request.mutable_inputs()->clear(); + auto& input = (*request.mutable_inputs())[name]; + prepareInferStringTensor(input, name, data, putBufferInInputTensorContent, nullptr); +} + +void assertOutputTensorMatchExpectations(const ov::Tensor& tensor, std::vector expectedStrings) { + size_t maxStringLength = 0; + for (const auto& input : expectedStrings) { + maxStringLength = std::max(maxStringLength, input.size()); + } + size_t width = maxStringLength + 1; + size_t i = 0; + ASSERT_EQ(tensor.get_shape().size(), 2); + ASSERT_EQ(tensor.get_shape()[0], expectedStrings.size()); + ASSERT_EQ(tensor.get_shape()[1], width); + ASSERT_EQ(tensor.get_size(), (width * expectedStrings.size())); + for (const auto& input : expectedStrings) { + for (size_t j = 0; j < input.size(); j++) { + ASSERT_EQ( + tensor.data()[i * width + j], + reinterpret_cast(input.data())[j]) + << "Tensor data does not match expectations for input: " << input << " at index: " << i << " and position: " << j; + } + for (size_t j = input.size(); j < width; j++) { + ASSERT_EQ(tensor.data()[i * width + j], 0); + } + i++; + } +} + +void assertStringOutputProto(const tensorflow::TensorProto& proto, const std::vector& expectedStrings) { + ASSERT_EQ(proto.string_val_size(), expectedStrings.size()); + for (size_t i = 0; i < expectedStrings.size(); i++) { + ASSERT_EQ(proto.string_val(i), expectedStrings[i]); + } +} +void assertStringOutputProto(const KFSTensorOutputProto& proto, const std::vector& expectedStrings) { + ASSERT_EQ(proto.contents().bytes_contents_size(), expectedStrings.size()); + for (size_t i = 0; i < expectedStrings.size(); i++) { + ASSERT_EQ(proto.contents().bytes_contents(i), expectedStrings[i]); + } +} +void assertStringOutputProto(const ovms::InferenceTensor& proto, const std::vector& expectedStrings) { + FAIL() << "not implemented"; +} + +void assertStringResponse(const tensorflow::serving::PredictResponse& proto, const std::vector& expectedStrings, const std::string& outputName) { + ASSERT_EQ(proto.outputs().count(outputName), 1); + ASSERT_EQ(proto.outputs().at(outputName).dtype(), tensorflow::DataType::DT_STRING); + ASSERT_EQ(proto.outputs().at(outputName).tensor_shape().dim_size(), 1); + ASSERT_EQ(proto.outputs().at(outputName).tensor_shape().dim(0).size(), expectedStrings.size()); + assertStringOutputProto(proto.outputs().at(outputName), expectedStrings); +} +void assertStringResponse(const ::KFSResponse& proto, const std::vector& expectedStrings, const std::string& outputName) { + ASSERT_EQ(proto.outputs_size(), 1); + ASSERT_EQ(proto.outputs(0).name(), outputName); + ASSERT_EQ(proto.outputs(0).datatype(), "BYTES"); + ASSERT_EQ(proto.outputs(0).shape_size(), 1); + ASSERT_EQ(proto.outputs(0).shape(0), expectedStrings.size()); + std::string expectedString; + for (auto str : expectedStrings) { + int size = str.size(); + for (int k = 0; k < 4; k++, size >>= 8) { + expectedString += static_cast(size & 0xff); + } + expectedString.append(str); + } + ASSERT_EQ(memcmp(proto.raw_output_contents(0).data(), expectedString.data(), expectedString.size()), 0); +} +void assertStringResponse(const ovms::InferenceResponse& proto, const std::vector& expectedStrings, const std::string& outputName) { + FAIL() << "not implemented"; +} + void prepareBinaryPredictRequest(tensorflow::serving::PredictRequest& request, const std::string& inputName, const int batchSize) { auto& tensor = (*request.mutable_inputs())[inputName]; size_t filesize = 0; @@ -380,7 +551,7 @@ std::string* findKFSInferInputTensorContentInRawInputs(::KFSRequest& request, co } return content; } -void prepareKFSInferInputTensor(::KFSRequest& request, const std::string& name, const std::tuple& inputInfo, +void prepareKFSInferInputTensor(::KFSRequest& request, const std::string& name, const std::tuple& inputInfo, const std::vector& data, bool putBufferInInputTensorContent) { auto [shape, type] = inputInfo; prepareKFSInferInputTensor(request, name, @@ -388,7 +559,7 @@ void prepareKFSInferInputTensor(::KFSRequest& request, const std::string& name, data, putBufferInInputTensorContent); } -void prepareCAPIInferInputTensor(ovms::InferenceRequest& request, const std::string& name, const std::tuple& inputInfo, +void prepareCAPIInferInputTensor(ovms::InferenceRequest& request, const std::string& name, const std::tuple& inputInfo, const std::vector& data, uint32_t decrementBufferSize, OVMS_BufferType bufferType, std::optional deviceId) { auto [shape, type] = inputInfo; prepareCAPIInferInputTensor(request, name, @@ -396,7 +567,7 @@ void prepareCAPIInferInputTensor(ovms::InferenceRequest& request, const std::str data, decrementBufferSize, bufferType, deviceId); } -void prepareCAPIInferInputTensor(ovms::InferenceRequest& request, const std::string& name, const std::tuple& inputInfo, +void prepareCAPIInferInputTensor(ovms::InferenceRequest& request, const std::string& name, const std::tuple& inputInfo, const std::vector& data, uint32_t decrementBufferSize, OVMS_BufferType bufferType, std::optional deviceId) { auto [shape, datatype] = inputInfo; size_t elementsCount = 1; @@ -411,7 +582,7 @@ void prepareCAPIInferInputTensor(ovms::InferenceRequest& request, const std::str elementsCount *= dim; } - request.addInput(name.c_str(), datatype, reinterpret_cast(shape.data()), shape.size()); + request.addInput(name.c_str(), datatype, shape.data(), shape.size()); size_t dataSize = 0; if (isShapeNegative) { @@ -425,7 +596,7 @@ void prepareCAPIInferInputTensor(ovms::InferenceRequest& request, const std::str request.setInputBuffer(name.c_str(), data.data(), dataSize, bufferType, deviceId); } -void prepareKFSInferInputTensor(::KFSRequest& request, const std::string& name, const std::tuple& inputInfo, +void prepareKFSInferInputTensor(::KFSRequest& request, const std::string& name, const std::tuple& inputInfo, const std::vector& data, bool putBufferInInputTensorContent) { auto it = request.mutable_inputs()->begin(); size_t bufferId = 0; @@ -548,6 +719,22 @@ void randomizePort(std::string& port) { std::uniform_int_distribution<> dist{0, 9}; for (auto j : {1, 2, 3}) { char* digitToRandomize = (char*)port.c_str() + j; - *digitToRandomize += dist(eng); + *digitToRandomize = '0' + dist(eng); + } +} +void randomizePorts(std::string& port1, std::string& port2) { + randomizePort(port1); + randomizePort(port2); + while (port2 == port1) { + randomizePort(port2); } } + +std::shared_ptr createTensorInfoCopyWithPrecision(std::shared_ptr src, ovms::Precision newPrecision) { + return std::make_shared( + src->getName(), + src->getMappedName(), + newPrecision, + src->getShape(), + src->getLayout()); +} diff --git a/src/test/test_utils.hpp b/src/test/test_utils.hpp index 8b0f0a24be..802aab2f29 100644 --- a/src/test/test_utils.hpp +++ b/src/test/test_utils.hpp @@ -43,15 +43,17 @@ #include "../metric_registry.hpp" #include "../modelinstance.hpp" #include "../modelmanager.hpp" +#include "../shape.hpp" #include "../tensorinfo.hpp" -using signed_shape_t = std::vector; -using inputs_info_t = std::map>; +using inputs_info_t = std::map>; const std::string dummy_model_location = std::filesystem::current_path().u8string() + "/src/test/dummy"; const std::string dummy_fp64_model_location = std::filesystem::current_path().u8string() + "/src/test/dummy_fp64"; const std::string sum_model_location = std::filesystem::current_path().u8string() + "/src/test/add_two_inputs_model"; const std::string increment_1x3x4x5_model_location = std::filesystem::current_path().u8string() + "/src/test/increment_1x3x4x5"; +const std::string passthrough_model_location = std::filesystem::current_path().u8string() + "/src/test/passthrough"; +const std::string dummy_saved_model_location = std::filesystem::current_path().u8string() + "/src/test/dummy_saved_model"; const ovms::ModelConfig DUMMY_MODEL_CONFIG{ "dummy", @@ -113,12 +115,42 @@ const ovms::ModelConfig INCREMENT_1x3x4x5_MODEL_CONFIG{ increment_1x3x4x5_model_location, // local path }; +const ovms::ModelConfig PASSTHROUGH_MODEL_CONFIG{ + "passthrough", + passthrough_model_location, // base path + "CPU", // target device + "1", // batchsize + 1, // NIREQ + false, // is stateful + true, // idle sequence cleanup enabled + false, // low latency transformation enabled + 500, // stateful sequence max number + "", // cache directory + 1, // model_version unused since version are read from path + passthrough_model_location, // local path +}; + +const ovms::ModelConfig DUMMY_SAVED_MODEL_CONFIG{ + "dummy_saved_model", + dummy_saved_model_location, // base path + "CPU", // target device + "1", // batchsize + 1, // NIREQ + false, // is stateful + true, // idle sequence cleanup enabled + false, // low latency transformation enabled + 500, // stateful sequence max number + "", // cache directory + 1, // model_version unused since version are read from path + dummy_saved_model_location, // local path +}; + constexpr const char* DUMMY_MODEL_INPUT_NAME = "b"; constexpr const char* DUMMY_MODEL_OUTPUT_NAME = "a"; constexpr const int DUMMY_MODEL_INPUT_SIZE = 10; constexpr const int DUMMY_MODEL_OUTPUT_SIZE = 10; constexpr const float DUMMY_ADDITION_VALUE = 1.0; -const signed_shape_t DUMMY_MODEL_SHAPE{1, 10}; +const ovms::signed_shape_t DUMMY_MODEL_SHAPE{1, 10}; const ovms::Shape DUMMY_MODEL_SHAPE_META{1, 10}; constexpr const char* DUMMY_FP64_MODEL_INPUT_NAME = "input:0"; @@ -134,6 +166,9 @@ constexpr const char* INCREMENT_1x3x4x5_MODEL_INPUT_NAME = "input"; constexpr const char* INCREMENT_1x3x4x5_MODEL_OUTPUT_NAME = "output"; constexpr const float INCREMENT_1x3x4x5_ADDITION_VALUE = 1.0; +constexpr const char* PASSTHROUGH_MODEL_INPUT_NAME = "input"; +constexpr const char* PASSTHROUGH_MODEL_OUTPUT_NAME = "copy:0"; + const std::string UNUSED_SERVABLE_NAME = "UNUSED_SERVABLE_NAME"; constexpr const ovms::model_version_t UNUSED_MODEL_VERSION = 42; // Answer to the Ultimate Question of Life @@ -161,14 +196,14 @@ void preparePredictRequest(tensorflow::serving::PredictRequest& request, inputs_ KFSTensorInputProto* findKFSInferInputTensor(::KFSRequest& request, const std::string& name); std::string* findKFSInferInputTensorContentInRawInputs(::KFSRequest& request, const std::string& name); -void prepareKFSInferInputTensor(::KFSRequest& request, const std::string& name, const std::tuple& inputInfo, +void prepareKFSInferInputTensor(::KFSRequest& request, const std::string& name, const std::tuple& inputInfo, const std::vector& data = {}, bool putBufferInInputTensorContent = false); -void prepareKFSInferInputTensor(::KFSRequest& request, const std::string& name, const std::tuple& inputInfo, +void prepareKFSInferInputTensor(::KFSRequest& request, const std::string& name, const std::tuple& inputInfo, const std::vector& data = {}, bool putBufferInInputTensorContent = false); -void prepareCAPIInferInputTensor(ovms::InferenceRequest& request, const std::string& name, const std::tuple& inputInfo, +void prepareCAPIInferInputTensor(ovms::InferenceRequest& request, const std::string& name, const std::tuple& inputInfo, const std::vector& data, uint32_t decrementBufferSize = 0, OVMS_BufferType bufferType = OVMS_BUFFERTYPE_CPU, std::optional deviceId = std::nullopt); -void prepareCAPIInferInputTensor(ovms::InferenceRequest& request, const std::string& name, const std::tuple& inputInfo, +void prepareCAPIInferInputTensor(ovms::InferenceRequest& request, const std::string& name, const std::tuple& inputInfo, const std::vector& data, uint32_t decrementBufferSize = 0, OVMS_BufferType bufferType = OVMS_BUFFERTYPE_CPU, std::optional deviceId = std::nullopt); void preparePredictRequest(::KFSRequest& request, inputs_info_t requestInputs, const std::vector& data = {}, bool putBufferInInputTensorContent = false); @@ -176,6 +211,16 @@ void preparePredictRequest(::KFSRequest& request, inputs_info_t requestInputs, c void preparePredictRequest(ovms::InferenceRequest& request, inputs_info_t requestInputs, const std::vector& data, uint32_t decrementBufferSize = 0, OVMS_BufferType bufferType = OVMS_BUFFERTYPE_CPU, std::optional deviceId = std::nullopt); +void prepareInferStringTensor(::KFSRequest::InferInputTensor& tensor, const std::string& name, const std::vector& data, bool putBufferInInputTensorContent, std::string* content); +void prepareInferStringTensor(tensorflow::TensorProto& tensor, const std::string& name, const std::vector& data, bool putBufferInInputTensorContent, std::string* content); +void prepareInferStringTensor(ovms::InferenceTensor& tensor, const std::string& name, const std::vector& data, bool putBufferInInputTensorContent, std::string* content); + +void prepareInferStringRequest(::KFSRequest& request, const std::string& name, const std::vector& data, bool putBufferInInputTensorContent = true); +void prepareInferStringRequest(tensorflow::serving::PredictRequest& request, const std::string& name, const std::vector& data, bool putBufferInInputTensorContent = true); +void prepareInferStringRequest(ovms::InferenceRequest& request, const std::string& name, const std::vector& data, bool putBufferInInputTensorContent = true); // CAPI binary not supported + +void assertOutputTensorMatchExpectations(const ov::Tensor& tensor, std::vector expectedStrings); + void prepareBinaryPredictRequest(tensorflow::serving::PredictRequest& request, const std::string& inputName, const int batchSize); void prepareBinaryPredictRequest(::KFSRequest& request, const std::string& inputName, const int batchSize); void prepareBinaryPredictRequest(ovms::InferenceRequest& request, const std::string& inputName, const int batchSize); // CAPI binary not supported @@ -210,12 +255,25 @@ std::string readableError(const T* expected_output, const T* actual_output, cons return ss.str(); } +void assertStringOutputProto(const tensorflow::TensorProto& proto, const std::vector& expectedStrings); +void assertStringOutputProto(const KFSTensorOutputProto& proto, const std::vector& expectedStrings); +void assertStringOutputProto(const ovms::InferenceTensor& proto, const std::vector& expectedStrings); + +void assertStringResponse(const tensorflow::serving::PredictResponse& proto, const std::vector& expectedStrings, const std::string& outputName); +void assertStringResponse(const ::KFSResponse& proto, const std::vector& expectedStrings, const std::string& outputName); +void assertStringResponse(const ovms::InferenceResponse& proto, const std::vector& expectedStrings, const std::string& outputName); + +void checkAddResponse(const std::string outputName, + const std::vector& requestData1, + const std::vector& requestData2, + ::KFSRequest& request, ::KFSResponse& response, int seriesLength, int batchSize, const std::string& servableName); + template void checkIncrement4DimResponse(const std::string outputName, const std::vector& expectedData, - tensorflow::serving::PredictRequest& request, tensorflow::serving::PredictResponse& response, - const std::vector& expectedShape) { + const std::vector& expectedShape, + bool checkRaw = true) { ASSERT_EQ(response.outputs().count(outputName), 1) << "Did not find:" << outputName; const auto& output_proto = response.outputs().at(outputName); @@ -237,26 +295,33 @@ void checkIncrement4DimResponse(const std::string outputName, template void checkIncrement4DimResponse(const std::string outputName, const std::vector& expectedData, - ::KFSRequest& request, ::KFSResponse& response, - const std::vector& expectedShape) { + const std::vector& expectedShape, + bool checkRaw = true) { ASSERT_EQ(response.outputs_size(), 1); - ASSERT_EQ(response.raw_output_contents_size(), 1); ASSERT_EQ(response.mutable_outputs(0)->name(), outputName); - - auto elementsCount = std::accumulate(expectedShape.begin(), expectedShape.end(), 1, std::multiplies()); - - ASSERT_EQ(response.raw_output_contents(0).size(), elementsCount * sizeof(T)); ASSERT_EQ(response.outputs(0).shape_size(), expectedShape.size()); for (size_t i = 0; i < expectedShape.size(); i++) { ASSERT_EQ(response.outputs(0).shape(i), expectedShape[i]); } - T* actual_output = (T*)response.raw_output_contents(0).data(); - T* expected_output = (T*)expectedData.data(); - const int dataLengthToCheck = elementsCount * sizeof(T); - EXPECT_EQ(0, std::memcmp(actual_output, expected_output, dataLengthToCheck)) - << readableError(expected_output, actual_output, dataLengthToCheck / sizeof(T)); + if (checkRaw) { + ASSERT_EQ(response.raw_output_contents_size(), 1); + auto elementsCount = std::accumulate(expectedShape.begin(), expectedShape.end(), 1, std::multiplies()); + ASSERT_EQ(response.raw_output_contents(0).size(), elementsCount * sizeof(T)); + T* actual_output = (T*)response.raw_output_contents(0).data(); + T* expected_output = (T*)expectedData.data(); + const int dataLengthToCheck = elementsCount * sizeof(T); + EXPECT_EQ(0, std::memcmp(actual_output, expected_output, dataLengthToCheck)) + << readableError(expected_output, actual_output, dataLengthToCheck / sizeof(T)); + } else { + ASSERT_EQ(response.outputs(0).datatype(), "UINT8") << "other precision testing currently not supported"; + ASSERT_EQ(sizeof(T), 1) << "other precision testing currently not supported"; + ASSERT_EQ(response.outputs(0).contents().uint_contents_size(), expectedData.size()); + for (size_t i = 0; i < expectedData.size(); i++) { + ASSERT_EQ(response.outputs(0).contents().uint_contents(i), expectedData[i]); + } + } } void checkIncrement4DimShape(const std::string outputName, @@ -579,8 +644,11 @@ static const std::vector UNSUPPORTED_CAPI_INPUT_PRECISIONS_TENS }; void randomizePort(std::string& port); +void randomizePorts(std::string& port1, std::string& port2); class ConstructorEnabledConfig : public ovms::Config { public: ConstructorEnabledConfig() {} }; + +std::shared_ptr createTensorInfoCopyWithPrecision(std::shared_ptr src, ovms::Precision precision); diff --git a/src/test/tfs_rest_parser_binary_inputs_test.cpp b/src/test/tfs_rest_parser_binary_inputs_test.cpp index cf691842c9..037242800c 100644 --- a/src/test/tfs_rest_parser_binary_inputs_test.cpp +++ b/src/test/tfs_rest_parser_binary_inputs_test.cpp @@ -66,6 +66,126 @@ TEST_F(TFSRestParserBinaryInputs, BatchSize2) { EXPECT_EQ(std::memcmp(parser.getProto().inputs().find("i")->second.string_val(0).c_str(), image_bytes.get(), filesize), 0); } +TEST_F(TFSRestParserBinaryInputs, RowStringMixedPrecision) { + std::string request = R"({"signature_name":"","instances":[{"i": "abcd"}, {"i": 1234}]})"; + + TFSRestParser parser(prepareTensors({{"i", {-1, -1}}}, ovms::Precision::U8)); + ASSERT_EQ(parser.parse(request.c_str()), StatusCode::REST_COULD_NOT_PARSE_INSTANCE); +} + +TEST_F(TFSRestParserBinaryInputs, ColumnStringMixedPrecision) { + std::string request = R"({"signature_name":"","inputs":{"i":["abcd", "efg", 52.1, "xyz"]}})"; + + TFSRestParser parser(prepareTensors({{"i", {-1, -1}}}, ovms ::Precision::U8)); + ASSERT_EQ(parser.parse(request.c_str()), StatusCode::REST_COULD_NOT_PARSE_INPUT); +} + +TEST_F(TFSRestParserBinaryInputs, ColumnStringMixedPrecision2) { + std::string request = R"({"signature_name":"","inputs":{"i":[[2,3,4],[5,"abcd",7]]}})"; + + TFSRestParser parser(prepareTensors({{"i", {-1, -1}}}, ovms ::Precision::U8)); + ASSERT_EQ(parser.parse(request.c_str()), StatusCode::REST_COULD_NOT_PARSE_INPUT); +} + +TEST_F(TFSRestParserBinaryInputs, RowString) { + std::string request = R"({"signature_name":"","instances":[{"i":"abcd"}]})"; + + TFSRestParser parser(prepareTensors({{"i", {-1, -1}}}, ovms ::Precision::U8)); + ASSERT_EQ(parser.parse(request.c_str()), StatusCode::OK); + ASSERT_EQ(parser.getProto().inputs_size(), 1); + ASSERT_EQ(parser.getProto().inputs().count("i"), 1); + ASSERT_EQ(parser.getProto().inputs().find("i")->second.string_val_size(), 1); + EXPECT_THAT(asVector(parser.getProto().inputs().at("i").tensor_shape()), ElementsAre(1)); + ASSERT_EQ(parser.getProto().inputs().at("i").dtype(), tensorflow::DataType::DT_STRING); + EXPECT_EQ(strcmp(parser.getProto().inputs().find("i")->second.string_val(0).c_str(), "abcd"), 0); +} + +TEST_F(TFSRestParserBinaryInputs, RowStringInvalidPrecision) { + std::string request = R"({"signature_name":"","instances":[{"i":"abcd"}]})"; + + TFSRestParser parser(prepareTensors({{"i", {-1, -1}}}, ovms ::Precision::FP32)); + ASSERT_EQ(parser.parse(request.c_str()), StatusCode::REST_COULD_NOT_PARSE_INSTANCE); +} + +TEST_F(TFSRestParserBinaryInputs, RowStringInvalidShape) { + std::string request = R"({"signature_name":"","instances":[{"i":"abcd"}]})"; + + TFSRestParser parser(prepareTensors({{"i", {-1, -1, -1}}}, ovms ::Precision::U8)); + ASSERT_EQ(parser.parse(request.c_str()), StatusCode::OK); + ASSERT_EQ(parser.getProto().inputs_size(), 1); + ASSERT_EQ(parser.getProto().inputs().count("i"), 1); + ASSERT_EQ(parser.getProto().inputs().find("i")->second.string_val_size(), 1); + EXPECT_THAT(asVector(parser.getProto().inputs().at("i").tensor_shape()), ElementsAre(1)); + ASSERT_EQ(parser.getProto().inputs().at("i").dtype(), tensorflow::DataType::DT_STRING); + EXPECT_EQ(strcmp(parser.getProto().inputs().find("i")->second.string_val(0).c_str(), "abcd"), 0); +} + +TEST_F(TFSRestParserBinaryInputs, RowStringStaticShape) { + std::string request = R"({"signature_name":"","instances":[{"i":"abcd"}]})"; + + TFSRestParser parser(prepareTensors({{"i", {1, 4}}}, ovms ::Precision::U8)); + ASSERT_EQ(parser.parse(request.c_str()), StatusCode::OK); + ASSERT_EQ(parser.getProto().inputs_size(), 1); + ASSERT_EQ(parser.getProto().inputs().count("i"), 1); + ASSERT_EQ(parser.getProto().inputs().find("i")->second.string_val_size(), 1); + EXPECT_THAT(asVector(parser.getProto().inputs().at("i").tensor_shape()), ElementsAre(1)); + ASSERT_EQ(parser.getProto().inputs().at("i").dtype(), tensorflow::DataType::DT_STRING); + EXPECT_EQ(strcmp(parser.getProto().inputs().find("i")->second.string_val(0).c_str(), "abcd"), 0); +} + +TEST_F(TFSRestParserBinaryInputs, ColumnString) { + std::string request = R"({"signature_name":"","inputs":{"i":["abcd"]}})"; + + TFSRestParser parser(prepareTensors({{"i", {-1, -1}}}, ovms ::Precision::U8)); + ASSERT_EQ(parser.parse(request.c_str()), StatusCode::OK); + ASSERT_EQ(parser.getProto().inputs_size(), 1); + ASSERT_EQ(parser.getProto().inputs().count("i"), 1); + ASSERT_EQ(parser.getProto().inputs().find("i")->second.string_val_size(), 1); + EXPECT_THAT(asVector(parser.getProto().inputs().at("i").tensor_shape()), ElementsAre(1)); + ASSERT_EQ(parser.getProto().inputs().at("i").dtype(), tensorflow::DataType::DT_STRING); + EXPECT_EQ(strcmp(parser.getProto().inputs().find("i")->second.string_val(0).c_str(), "abcd"), 0); +} + +TEST_F(TFSRestParserBinaryInputs, ColumnStringUnnamed) { + std::string request = R"({"signature_name":"","inputs":["abcd"]})"; + + TFSRestParser parser(prepareTensors({{"i", {-1, -1}}}, ovms ::Precision::U8)); + ASSERT_EQ(parser.parse(request.c_str()), StatusCode::OK); + ASSERT_EQ(parser.getProto().inputs_size(), 1); + ASSERT_EQ(parser.getProto().inputs().count("i"), 1); + ASSERT_EQ(parser.getProto().inputs().find("i")->second.string_val_size(), 1); + EXPECT_THAT(asVector(parser.getProto().inputs().at("i").tensor_shape()), ElementsAre(1)); + ASSERT_EQ(parser.getProto().inputs().at("i").dtype(), tensorflow::DataType::DT_STRING); + EXPECT_EQ(strcmp(parser.getProto().inputs().find("i")->second.string_val(0).c_str(), "abcd"), 0); +} + +TEST_F(TFSRestParserBinaryInputs, RowStringUnnamed) { + std::string request = R"({"signature_name":"","instances":["abcd"]})"; + + TFSRestParser parser(prepareTensors({{"i", {-1, -1}}}, ovms ::Precision::U8)); + ASSERT_EQ(parser.parse(request.c_str()), StatusCode::OK); + ASSERT_EQ(parser.getProto().inputs_size(), 1); + ASSERT_EQ(parser.getProto().inputs().count("i"), 1); + ASSERT_EQ(parser.getProto().inputs().find("i")->second.string_val_size(), 1); + EXPECT_THAT(asVector(parser.getProto().inputs().at("i").tensor_shape()), ElementsAre(1)); + ASSERT_EQ(parser.getProto().inputs().at("i").dtype(), tensorflow::DataType::DT_STRING); + EXPECT_EQ(strcmp(parser.getProto().inputs().find("i")->second.string_val(0).c_str(), "abcd"), 0); +} + +TEST_F(TFSRestParserBinaryInputs, RowStringBatchSize2) { + std::string request = R"({"signature_name":"","instances":[{"i":"abcd"}, {"i":"efgh"}]})"; + + TFSRestParser parser(prepareTensors({{"i", {-1, -1}}}, ovms ::Precision::U8)); + ASSERT_EQ(parser.parse(request.c_str()), StatusCode::OK); + ASSERT_EQ(parser.getProto().inputs_size(), 1); + ASSERT_EQ(parser.getProto().inputs().count("i"), 1); + ASSERT_EQ(parser.getProto().inputs().find("i")->second.string_val_size(), 2); + EXPECT_THAT(asVector(parser.getProto().inputs().at("i").tensor_shape()), ElementsAre(2)); + ASSERT_EQ(parser.getProto().inputs().at("i").dtype(), tensorflow::DataType::DT_STRING); + EXPECT_EQ(strcmp(parser.getProto().inputs().find("i")->second.string_val(0).c_str(), "abcd"), 0); + EXPECT_EQ(strcmp(parser.getProto().inputs().find("i")->second.string_val(1).c_str(), "efgh"), 0); +} + TEST_F(TFSRestParserBinaryInputs, RowName) { std::string request = R"({"signature_name":"","instances":[{"k":[{"b64":")" + b64encoded + R"("}]}]})"; diff --git a/src/test/tfs_rest_parser_row_test.cpp b/src/test/tfs_rest_parser_row_test.cpp index 56d036f311..28d0655a52 100644 --- a/src/test/tfs_rest_parser_row_test.cpp +++ b/src/test/tfs_rest_parser_row_test.cpp @@ -411,7 +411,6 @@ TEST(TFSRestParserRow, NamedInstanceNotAnObject) { TEST(TFSRestParserRow, CouldNotDetectNamedOrNoNamed) { TFSRestParser parser(prepareTensors({}, ovms::Precision::FP16)); - EXPECT_EQ(parser.parse(R"({"signature_name":"","instances":["1", "2"]})"), StatusCode::REST_INSTANCES_NOT_NAMED_OR_NONAMED); EXPECT_EQ(parser.parse(R"({"signature_name":"","instances":[null, null]})"), StatusCode::REST_INSTANCES_NOT_NAMED_OR_NONAMED); } diff --git a/src/test/threadsafequeue_test.cpp b/src/test/threadsafequeue_test.cpp index 8199075749..947c64da01 100644 --- a/src/test/threadsafequeue_test.cpp +++ b/src/test/threadsafequeue_test.cpp @@ -73,7 +73,7 @@ TEST(TestThreadSafeQueue, NoElementsPushed) { const uint ELEMENTS_TO_INSERT = 500; -void producer(ThreadSafeQueue& queue, std::future startSignal) { +static void producer(ThreadSafeQueue& queue, std::future startSignal) { startSignal.get(); uint counter = 0; while (counter < ELEMENTS_TO_INSERT) { @@ -82,7 +82,7 @@ void producer(ThreadSafeQueue& queue, std::future startSignal) { } } -void consumer(ThreadSafeQueue& queue, std::future startSignal, std::vector& consumed, const uint elementsToPull) { +static void consumer(ThreadSafeQueue& queue, std::future startSignal, std::vector& consumed, const uint elementsToPull) { startSignal.get(); uint counter = 0; while (counter < elementsToPull) { diff --git a/src/tfs_frontend/tfs_utils.cpp b/src/tfs_frontend/tfs_utils.cpp index 5cf84b4207..e2b9a908a0 100644 --- a/src/tfs_frontend/tfs_utils.cpp +++ b/src/tfs_frontend/tfs_utils.cpp @@ -147,4 +147,25 @@ bool isNativeFileFormatUsed(const TFSInputTensorType& proto) { return proto.dtype() == TFSDataType::DT_STRING; // return request.string_val_size() > 0; } + +bool requiresPreProcessing(const TFSInputTensorType& proto) { + return proto.dtype() == tensorflow::DataType::DT_STRING; +} + +std::string& createOrGetString(TFSInputTensorType& proto, int index) { + while (proto.string_val_size() <= index) { + proto.add_string_val(); + } + return *proto.mutable_string_val(index); +} + +void setBatchSize(TFSInputTensorType& proto, int64_t batch) { + if (proto.tensor_shape().dim_size() == 0) { + proto.mutable_tensor_shape()->add_dim(); + } + proto.mutable_tensor_shape()->mutable_dim(0)->set_size(batch); +} +void setStringPrecision(TFSInputTensorType& proto) { + proto.set_dtype(TFSDataType::DT_STRING); +} } // namespace ovms diff --git a/src/tfs_frontend/tfs_utils.hpp b/src/tfs_frontend/tfs_utils.hpp index 24c3fadf32..e69d775df3 100644 --- a/src/tfs_frontend/tfs_utils.hpp +++ b/src/tfs_frontend/tfs_utils.hpp @@ -42,4 +42,8 @@ Status prepareConsolidatedTensorImpl(TFSPredictResponse* response, const std::st const std::string& getRequestServableName(const TFSPredictRequest& request); Status isNativeFileFormatUsed(const TFSPredictRequest& request, const std::string& name, bool& isNativeFileFormatUsed); bool isNativeFileFormatUsed(const TFSInputTensorType& request); +bool requiresPreProcessing(const TFSInputTensorType& proto); +std::string& createOrGetString(TFSInputTensorType& proto, int index); +void setBatchSize(TFSInputTensorType& proto, int64_t batch); +void setStringPrecision(TFSInputTensorType& proto); } // namespace ovms diff --git a/third_party/aws/BUILD b/third_party/aws/BUILD index dccc555394..b02fd412b2 100644 --- a/third_party/aws/BUILD +++ b/third_party/aws/BUILD @@ -18,6 +18,14 @@ package( default_visibility = ["//visibility:public"], ) +# TODO: Decide if we want to switch to genrule cmake for all docker compilations +# | sed -E 's:(/sandbox/.+)::g') +#genrule( +# name = "aws_cmake", +# outs = ["build"], +# cmd = 'cd external/awssdk && mkdir build && cd build && cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_ONLY=s3 -DENABLE_TESTING=OFF -DBUILD_SHARED_LIBS=OFF -DMINIMIZE_SIZE=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DFORCE_SHARED_CRT=OFF -DSIMPLE_INSTALL=OFF -DCMAKE_CXX_FLAGS=" -D_GLIBCXX_USE_CXX11_ABI=1 " .. && make', +#) + cc_library( name = "core", srcs = ["build/aws-cpp-sdk-core/libaws-cpp-sdk-core.a"], @@ -33,7 +41,7 @@ cc_library( hdrs = glob([ "aws-cpp-sdk-s3/include/**/*.h", ]), - strip_include_prefix = "aws-cpp-sdk-s3/include" + strip_include_prefix = "aws-cpp-sdk-s3/include", ) cc_library( @@ -46,5 +54,5 @@ cc_library( hdrs = glob([ "build/.deps/install/include/**/*.*", ]), - strip_include_prefix = "build/.deps/install/include" + strip_include_prefix = "build/.deps/install/include", ) diff --git a/third_party/libevent/BUILD b/third_party/libevent/BUILD index 114ecac833..a0117424f4 100644 --- a/third_party/libevent/BUILD +++ b/third_party/libevent/BUILD @@ -208,7 +208,7 @@ genrule( "export INSTALL_DIR=$$(pwd)/$(@D)/libevent", "export TMP_DIR=$$(mktemp -d -t libevent.XXXXXX)", "mkdir -p $$TMP_DIR", - "cp -R $$(pwd)/external/com_github_libevent_libevent/* $$TMP_DIR", + "cp -R $$(pwd | sed -E 's:(/sandbox/.+)::g')/external/com_github_libevent_libevent/* $$TMP_DIR", "cd $$TMP_DIR", "./autogen.sh", "./configure --prefix=$$INSTALL_DIR CFLAGS=-fPIC CXXFLAGS=-fPIC --enable-shared=no --disable-openssl", diff --git a/third_party/mediapipe_calculators/BUILD b/third_party/mediapipe_calculators/BUILD new file mode 100644 index 0000000000..1ba1c8976d --- /dev/null +++ b/third_party/mediapipe_calculators/BUILD @@ -0,0 +1,114 @@ +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +package( + default_visibility = ["//visibility:public"], +) + +cc_library( + name = "mediapipe_calculators", + srcs = [], + hdrs = [], + visibility = ["//visibility:public"], + #TODO:ENABLE_GPU_WIP:defines = ["MEDIAPIPE_DISABLE_GPU=1"], + deps = [ + #ENABLE_WTIH_OV_CALCULATORS"@mediapipe//mediapipe/calculators/openvino:openvino_converter_calculator", + #ENABLE_WTIH_OV_CALCULATORS"@mediapipe//mediapipe/calculators/openvino:openvino_inference_calculator", + #ENABLE_WTIH_OV_CALCULATORS"@mediapipe//mediapipe/calculators/openvino:openvino_tensors_to_classification_calculator", + #ENABLE_WTIH_OV_CALCULATORS"@mediapipe//mediapipe/calculators/openvino:openvino_tensors_to_detections_calculator", + "@mediapipe//mediapipe/calculators/core:concatenate_vector_calculator", + "@mediapipe//mediapipe/calculators/core:flow_limiter_calculator", + "@mediapipe//mediapipe/calculators/core:previous_loopback_calculator", + "@mediapipe//mediapipe/calculators/core:split_vector_calculator", + "@mediapipe//mediapipe/calculators/core:add_header_calculator", + "@mediapipe//mediapipe/calculators/core:begin_loop_calculator", + "@mediapipe//mediapipe/calculators/core:end_loop_calculator", + "@mediapipe//mediapipe/calculators/core:concatenate_vector_calculator_hdr", + "@mediapipe//mediapipe/calculators/core:concatenate_detection_vector_calculator", + "@mediapipe//mediapipe/calculators/core:concatenate_proto_list_calculator", + "@mediapipe//mediapipe/calculators/core:clip_vector_size_calculator", + #SEGFAULT_ON_GLOG_INIT"@mediapipe//mediapipe/calculators/core:clip_detection_vector_size_calculator", + "@mediapipe//mediapipe/calculators/core:counting_source_calculator", + "@mediapipe//mediapipe/calculators/core:make_pair_calculator", + "@mediapipe//mediapipe/calculators/core:matrix_multiply_calculator", + "@mediapipe//mediapipe/calculators/core:matrix_subtract_calculator", + "@mediapipe//mediapipe/calculators/core:mux_calculator", + "@mediapipe//mediapipe/calculators/core:non_zero_calculator", + "@mediapipe//mediapipe/calculators/core:packet_cloner_calculator", + "@mediapipe//mediapipe/calculators/core:packet_inner_join_calculator", + "@mediapipe//mediapipe/calculators/core:packet_thinner_calculator", + "@mediapipe//mediapipe/calculators/core:pass_through_calculator", + "@mediapipe//mediapipe/calculators/core:round_robin_demux_calculator", + "@mediapipe//mediapipe/calculators/core:immediate_mux_calculator", + "@mediapipe//mediapipe/calculators/core:packet_presence_calculator", + "@mediapipe//mediapipe/calculators/core:string_to_int_calculator", + "@mediapipe//mediapipe/calculators/core:default_side_packet_calculator", + "@mediapipe//mediapipe/calculators/core:packet_resampler_calculator", + "@mediapipe//mediapipe/calculators/core:split_proto_list_calculator", + "@mediapipe//mediapipe/calculators/core:dequantize_byte_array_calculator", + "@mediapipe//mediapipe/calculators/core:quantize_float_vector_calculator", + "@mediapipe//mediapipe/calculators/core:sequence_shift_calculator", + "@mediapipe//mediapipe/calculators/core:gate_calculator", + "@mediapipe//mediapipe/calculators/core:matrix_to_vector_calculator", + "@mediapipe//mediapipe/calculators/core:merge_calculator", + "@mediapipe//mediapipe/calculators/core:stream_to_side_packet_calculator", + "@mediapipe//mediapipe/calculators/core:constant_side_packet_calculator", + "@mediapipe//mediapipe/calculators/core:graph_profile_calculator", + "@mediapipe//mediapipe/calculators/core:get_vector_item_calculator", + "@mediapipe//mediapipe/calculators/core:vector_size_calculator", + "@mediapipe//mediapipe/calculators/core:packet_sequencer_calculator", + "@mediapipe//mediapipe/calculators/core:merge_to_vector_calculator", + #TENSORFLOW LITE BELOW + "@mediapipe//mediapipe/calculators/tflite:ssd_anchors_calculator", + "@mediapipe//mediapipe/calculators/util:annotation_overlay_calculator", + "@mediapipe//mediapipe/calculators/util:detection_label_id_to_text_calculator", + "@mediapipe//mediapipe/calculators/util:detections_to_render_data_calculator", + "@mediapipe//mediapipe/calculators/util:non_max_suppression_calculator", + "@mediapipe//mediapipe/calculators/video:opencv_video_decoder_calculator", + "@mediapipe//mediapipe/calculators/video:opencv_video_encoder_calculator", + "@mediapipe//mediapipe/calculators/image:color_convert_calculator", + "@mediapipe//mediapipe/calculators/image:opencv_encoded_image_to_image_frame_calculator", + "@mediapipe//mediapipe/calculators/image:opencv_image_encoder_calculator", + "@mediapipe//mediapipe/calculators/image:opencv_put_text_calculator", + #GPU"@mediapipe//mediapipe/calculators/image:set_alpha_calculator", + #GPU"@mediapipe//mediapipe/calculators/image:bilateral_filter_calculator", + #GPU"@mediapipe//mediapipe/calculators/image:image_transformation_calculator", + "@mediapipe//mediapipe/calculators/image:image_cropping_calculator", + #GPU"@mediapipe//mediapipe/calculators/image:luminance_calculator", + #GPU"@mediapipe//mediapipe/calculators/image:sobel_edges_calculator", + #GPU"@mediapipe//mediapipe/calculators/image:recolor_calculator", + #REQUIRED_libyuv_DEPENDENCY"@mediapipe//mediapipe/calculators/image:scale_image_calculator", + "@mediapipe//mediapipe/calculators/image:image_clone_calculator", + "@mediapipe//mediapipe/calculators/image:image_properties_calculator", + #GPU"@mediapipe//mediapipe/calculators/image:mask_overlay_calculator", + "@mediapipe//mediapipe/calculators/image:feature_detector_calculator", + #REQUIRED_easyexif_DEPENDENCY"@mediapipe//mediapipe/calculators/image:image_file_properties_calculator", + "@mediapipe//mediapipe/calculators/image:segmentation_smoothing_calculator", + "@mediapipe//mediapipe/calculators/image:warp_affine_calculator", + #REQUIRED_libyuv_DEPENDENCY"@mediapipe//mediapipe/calculators/image:yuv_to_image_calculator", + #VISIBILITY_PRIVATE_THUS_SINK_BELOW"@mediapipe//mediapipe/calculators/internal:callback_packet_calculator", + "@mediapipe//mediapipe/framework/tool:sink", + "@mediapipe//mediapipe/calculators/video:flow_to_image_calculator", + "@mediapipe//mediapipe/calculators/video:tvl1_optical_flow_calculator", + "@mediapipe//mediapipe/calculators/video:motion_analysis_calculator", + "@mediapipe//mediapipe/calculators/video:flow_packager_calculator", + "@mediapipe//mediapipe/calculators/video:box_tracker_calculator", + "@mediapipe//mediapipe/calculators/video:box_detector_calculator", + "@mediapipe//mediapipe/calculators/video:tracked_detection_manager_calculator", + "@mediapipe//mediapipe/calculators/video:video_pre_stream_calculator", + ], + linkstatic=1, +) \ No newline at end of file diff --git a/third_party/opencv/BUILD b/third_party/opencv/BUILD index 819e363b60..f92909d436 100644 --- a/third_party/opencv/BUILD +++ b/third_party/opencv/BUILD @@ -22,8 +22,15 @@ cc_library( name = "opencv", srcs = glob([ "lib/libopencv_core.so", + "lib/libopencv_imgproc.so", "lib/libopencv_imgcodecs.so", - "lib/libopencv_imgproc.so" + #MEDIAPIPE CALCULATORS REQUIREMENTS BELOW + "lib/libopencv_highgui.so", + "lib/libopencv_calib3d.so", + "lib/libopencv_features2d.so", + "lib/libopencv_optflow.so", + "lib/libopencv_video.so", + "lib/libopencv_videoio.so", ]), hdrs = glob([ "include/**/*.*" diff --git a/third_party/opencv/install_opencv.sh b/third_party/opencv/install_opencv.sh index b5948039c2..3ef420edfe 100755 --- a/third_party/opencv/install_opencv.sh +++ b/third_party/opencv/install_opencv.sh @@ -40,7 +40,7 @@ fi #=================================================================================================== # OpenCV installation -if [ "$os" == "ubuntu20.04" ] ; then +if [ "$os" == "ubuntu20.04" ] || [ "$os" == "ubuntu22.04" ] ; then export DEBIAN_FRONTEND=noninteractive apt update && apt install -y build-essential git cmake \ && rm -rf /var/lib/apt/lists/* @@ -54,7 +54,10 @@ fi current_working_dir=$(pwd) cd $work_dir +rm -rf $opencv_branch $work_dir/opencv_repo +rm -rf $opencv_branch $work_dir/opencv_contrib_repo git clone https://github.com/opencv/opencv.git --depth 1 -b $opencv_branch $work_dir/opencv_repo +git clone https://github.com/opencv/opencv_contrib.git --depth 1 -b $opencv_branch $work_dir/opencv_contrib_repo cd $work_dir/opencv_repo mkdir -p $work_dir/opencv_repo/build cd $work_dir/opencv_repo/build diff --git a/third_party/opencv/opencv_cmake_flags.txt b/third_party/opencv/opencv_cmake_flags.txt index 2668f35545..238381f020 100644 --- a/third_party/opencv/opencv_cmake_flags.txt +++ b/third_party/opencv/opencv_cmake_flags.txt @@ -1,9 +1,41 @@ --D BUILD_LIST=core,improc,imgcodecs \ +-D BUILD_LIST=core,improc,imgcodecs,calib3d,features2d,highgui,imgproc,video,videoio,optflow \ +-D OPENCV_EXTRA_MODULES_PATH=/opt/opencv_contrib_repo/modules \ -D CMAKE_BUILD_TYPE=Release \ -D CMAKE_INSTALL_PREFIX=/opt/opencv \ --D OPENCV_LIB_INSTALL_PATH=lib --D WITH_OPENJPEG=OFF --D WITH_JASPER=OFF --D WITH_OPENEXR=OFF --D WITH_TIFF=OFF - +-D OPENCV_LIB_INSTALL_PATH=lib \ +-D BUILD_TESTS=OFF \ +-D BUILD_PERF_TESTS=OFF \ +-D BUILD_opencv_ts=OFF \ +-D BUILD_opencv_aruco=OFF \ +-D BUILD_opencv_bgsegm=OFF \ +-D BUILD_opencv_bioinspired=OFF \ +-D BUILD_opencv_ccalib=OFF \ +-D BUILD_opencv_datasets=OFF \ +-D BUILD_opencv_dnn=OFF \ +-D BUILD_opencv_dnn_objdetect=OFF \ +-D BUILD_opencv_dpm=OFF \ +-D BUILD_opencv_face=OFF \ +-D BUILD_opencv_fuzzy=OFF \ +-D BUILD_opencv_hfs=OFF \ +-D BUILD_opencv_img_hash=OFF \ +-D BUILD_opencv_js=OFF \ +-D BUILD_opencv_line_descriptor=OFF \ +-D BUILD_opencv_phase_unwrapping=OFF \ +-D BUILD_opencv_plot=OFF \ +-D BUILD_opencv_quality=OFF \ +-D BUILD_opencv_reg=OFF \ +-D BUILD_opencv_rgbd=OFF \ +-D BUILD_opencv_saliency=OFF \ +-D BUILD_opencv_shape=OFF \ +-D BUILD_opencv_structured_light=OFF \ +-D BUILD_opencv_surface_matching=OFF \ +-D BUILD_opencv_world=OFF \ +-D BUILD_opencv_xobjdetect=OFF \ +-D BUILD_opencv_xphoto=OFF \ +-D CV_ENABLE_INTRINSICS=ON \ +-D WITH_EIGEN=ON \ +-D WITH_PTHREADS=ON \ +-D WITH_PTHREADS_PF=ON \ +-D WITH_JPEG=ON \ +-D WITH_PNG=ON \ +-D WITH_TIFF=ON \ No newline at end of file diff --git a/third_party/openvino/BUILD b/third_party/openvino/BUILD index 499f0e55cc..58f0c60d14 100644 --- a/third_party/openvino/BUILD +++ b/third_party/openvino/BUILD @@ -35,7 +35,7 @@ cc_library( strip_include_prefix = "include", visibility = ["//visibility:public"], deps = [ - "@openvino//:openvino_old_headers", + "@linux_openvino//:openvino_old_headers", ] ) @@ -53,11 +53,10 @@ cc_library( srcs = glob([ "lib/intel64/libopenvino.so" ]), - data = [ "lib/intel64/plugins.xml" ], strip_include_prefix = "include/ie", visibility = ["//visibility:public"], deps = [ - "@openvino//:ngraph", - "@openvino//:openvino_new_headers", + "@linux_openvino//:ngraph", + "@linux_openvino//:openvino_new_headers", ], ) diff --git a/yarn.lock b/yarn.lock new file mode 100644 index 0000000000..c95abed3b6 --- /dev/null +++ b/yarn.lock @@ -0,0 +1,13 @@ +# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY. +# yarn lockfile v1 + + +"@bazel/typescript@^5.7.1": + version "5.7.2" + resolved "https://registry.yarnpkg.com/@bazel/typescript/-/typescript-5.7.2.tgz#a341215dc93ce28794e8430b311756816140bd78" + integrity sha512-tarBJBEIirnq/YaeYu18vXcDxjzlq4xhCXvXUxA0lhHX5oArjEcAEn4tmO0jF+t/7cbkAdMT7daG6vIHSz0QAA== + dependencies: + "@bazel/worker" "5.7.2" + semver "5.6.0" + source-map-support "0.5.9" + tsutils "3.21.0"