@@ -46,6 +46,13 @@ build --spawn_strategy=standalone
build --genrule_strategy=standalone
build --define=grpc_no_ares=true
+build --define=MEDIAPIPE_DISABLE_GPU=1
+coverage --define=MEDIAPIPE_DISABLE_GPU=1
+test --define=MEDIAPIPE_DISABLE_GPU=1
+build --define=MEDIAPIPE_DISABLE=0
+coverage --define=MEDIAPIPE_DISABLE=0
+test --define=MEDIAPIPE_DISABLE=0
# Sets the default Apple platform to macOS.
build --apple_platform_type=macos
@@ -65,6 +72,10 @@ build --cxxopt=-fno-strict-overflow
build --cxxopt=-fno-delete-null-pointer-checks
build --cxxopt=-fwrapv
build --cxxopt=-fstack-protector
+build --cxxopt=-fstack-clash-protection
+build --cxxopt=-Wformat
+build --cxxopt=-Wformat-security
+build --cxxopt=-Werror=format-security
# Adding "--cxxopt=-D_GLIBCXX_USE_CXX11_ABI=0" creates parity with TF
# compilation options. It also addresses memory use due to
@@ -75,3 +86,5 @@ build --experimental_repo_remote_exec
build --force_pic
build --experimental_cc_shared_library
+build --check_visibility=true
diff --git a/BUILD.bazel b/BUILD.bazel
new file mode 100644
index 0000000000..53072b64f2
--- /dev/null
+++ b/BUILD.bazel
@@ -0,0 +1,78 @@
+# Copyright (c) 2023 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# See the License for the specific language governing permissions and
+# limitations under the License.
+ "package.json",
+ "yarn.lock",
+load("@bazel_skylib//lib:selects.bzl", "selects")
+load("@mediapipe//mediapipe/framework:more_selects.bzl", "more_selects")
+ name = "disable_mediapipe",
+ define_values = {
+ },
+ visibility = ["//visibility:public"],
+ name = "not_disable_mediapipe",
+ negate = ":disable_mediapipe",
+ name = "ovms_dependencies",
+ deps = [
+ "@tensorflow_serving//tensorflow_serving/apis:prediction_service_cc_proto",
+ "@tensorflow_serving//tensorflow_serving/apis:model_service_cc_proto",
+ "@minitrace//:trace",
+ "@com_github_grpc_grpc//:grpc++",
+ "@org_tensorflow//tensorflow/core:framework",
+ "@com_github_tencent_rapidjson//:rapidjson",
+ "@com_github_gabime_spdlog//:spdlog",
+ "@com_github_jarro2783_cxxopts//:cxxopts",
+ "@awssdk//:s3",
+ "@awssdk//:core",
+ "@awssdk//:deps",
+ "@azure//:storage",
+ "@cpprest//:sdk",
+ "@boost//:lib",
+ "@com_github_googleapis_google_cloud_cpp//google/cloud/storage:storage_client",
+ "@tensorflow_serving//tensorflow_serving/util/net_http/server/public:http_server",
+ "@tensorflow_serving//tensorflow_serving/util/net_http/server/public:http_server_api",
+ "@tensorflow_serving//tensorflow_serving/util:threadpool_executor",
+ "@tensorflow_serving//tensorflow_serving/util:json_tensor",
+ "@linux_openvino//:openvino",
+ "@linux_opencv//:opencv",
+ "@com_github_jupp0r_prometheus_cpp//core",
+ "@oneTBB//:tbb",
+ ] + select({
+ "//conditions:default": [
+ "@mediapipe//mediapipe/framework:calculator_framework",
+ "@mediapipe//mediapipe/framework/port:logging",
+ "@mediapipe//mediapipe/framework/port:parse_text_proto",
+ "@mediapipe//mediapipe/framework/port:status",
+ "@mediapipe_calculators//:mediapipe_calculators",
+ "@model_api//:adapter_api",
+ ],
+ "//:disable_mediapipe" : [],
+ }),
+ visibility = ["//visibility:public"],
@@ -1,5 +1,5 @@
-# Copyright (c) 2020-2021 Intel Corporation
+# Copyright (c) 2020-2023 Intel Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -22,6 +22,9 @@ LABEL version="1.0.0"
SHELL ["/bin/bash", "-xo", "pipefail", "-c"]
+COPY entitlement /etc/pki/entitlement
+COPY rhsm-ca /etc/rhsm/ca
+RUN rm -f /etc/rhsm-host
RUN dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm && dnf clean all && yum update -d6 -y && yum install -d6 -y \
boost169-atomic \
@@ -127,19 +130,73 @@ RUN dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.n
unzip \
vim \
xz \
- https://vault.centos.org/centos/8/AppStream/x86_64/os/Packages/tbb-2018.2-9.el8.x86_64.rpm && \
+ https://vault.centos.org/centos/8/AppStream/x86_64/os/Packages/tbb-2018.2-9.el8.x86_64.rpm \
+ http://mirror.centos.org/centos/8-stream/PowerTools/x86_64/os/Packages/libusb-devel-0.1.5-12.el8.x86_64.rpm \
+ http://mirror.centos.org/centos/8-stream/BaseOS/x86_64/os/Packages/libusb-0.1.5-12.el8.x86_64.rpm \
+ http://mirror.centos.org/centos/8-stream/BaseOS/x86_64/os/Packages/libusbx-devel-1.0.23-4.el8.x86_64.rpm && \
yum clean all
+# Add Nvidia dev tool if needed
+# hadolint ignore=DL3003
+RUN if [ "$NVIDIA" == "1" ] ; then true ; else exit 0 ; fi ; \
+ yum config-manager --save --set-enabled codeready-builder-for-rhel-8-x86_64-rpms && \
+ yum -y module disable python36 && \
+ yum install -y \
+ libzstd-devel \
+ libcudnn8- \
+ libcudnn8-devel- \
+ libcutensor1- \
+ libcutensor-devel- \
+ cuda-cudart-devel-11-8 && \
+ # ignore errors on hosts with older nvidia drivers
+ yum install -y cuda-11-8 || true && \
+ yum install -y python38-Cython && \
+ curl -L https://github.com/Kitware/ninja/releases/download/v1.10.0.gfb670.kitware.jobserver-1/ninja-1.10.0.gfb670.kitware.jobserver-1_x86_64-linux-gnu.tar.gz | tar xzv --strip-components=1 -C /usr/local/bin && \
+ curl https://github.com/mozilla/sccache/releases/download/v0.2.15/sccache-v0.2.15-x86_64-unknown-linux-musl.tar.gz -L | tar xvzC /usr/local/bin --strip-components=1 --wildcards '*/sccache' && \
+ chmod a+x /usr/local/bin/sccache && \
+ curl https://github.com/Kitware/CMake/releases/download/v3.24.0/cmake-3.24.0-linux-x86_64.tar.gz -L | tar xzvC /usr/local --exclude={doc,man} --strip-components=1 && \
+ curl -L https://github.com/ccache/ccache/releases/download/v4.3/ccache-4.3.tar.xz | tar xJv && \
+ mkdir -p ccache-4.3/build && cd ccache-4.3/build && \
+ cmake -DCMAKE_BUILD_TYPE=Release -G Ninja .. && \
+ ninja -v install && \
+ rm -rf /var/cache/yum
# build_type=[ opt, dbg ]
ARG build_type=dbg
-ARG debug_bazel_flags=--strip=never\ --copt="-g"\ -c\ dbg
ARG minitrace_flags
ENV TEST_LOG="/root/.cache/bazel/_bazel_root/bc57d4817a53cab8c785464da57d1983/execroot/ovms/bazel-out/test.log"
+ARG ov_source_branch=master
+ARG ov_contrib_branch=master
+ARG sentencepiece=0
+ARG ov_source_org=openvinotoolkit
+ARG ov_contrib_org=openvinotoolkit
ARG ov_use_binary=1
ARG TEMP_DIR=/tmp/openvino_installer
+# hadolint ignore=DL3003
+RUN if [[ "$sentencepiece" == "1" || "$NVIDIA" == "1" ]] ; then true ; else exit 0 ; fi ; git clone https://github.com/$ov_contrib_org/openvino_contrib.git /openvino_contrib && cd /openvino_contrib && git checkout $ov_contrib_branch && git submodule update --init --recursive
+################### BUILD OPENVINO FROM SOURCE - buildarg ov_use_binary=0 ############################
+# Build OpenVINO and nGraph (OV dependency) with D_GLIBCXX_USE_CXX11_ABI=0 or 1
+# hadolint ignore=DL3003
+RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; git clone https://github.com/$ov_source_org/openvino.git /openvino && cd /openvino && git checkout $ov_source_branch && git submodule update --init --recursive
+WORKDIR /openvino/build
+RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; dnf install -y http://mirror.centos.org/centos/8-stream/PowerTools/x86_64/os/Packages/opencl-headers-2.2-1.20180306gite986688.el8.noarch.rpm && dnf clean all
+RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; cmake -DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" -DENABLE_SAMPLES=0 -DNGRAPH_USE_CXX_ABI=1 -DCMAKE_CXX_FLAGS=" -D_GLIBCXX_USE_CXX11_ABI=1 -Wno-error=parentheses " ..
+RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; make --jobs=$JOBS
+RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; make install
+RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; \
+ mkdir -p /opt/intel/openvino/extras && \
+ mkdir -p /opt/intel/openvino && \
+ ln -s /openvino/inference-engine/temp/opencv_*/opencv /opt/intel/openvino/extras && \
+ ln -s /usr/local/runtime /opt/intel/openvino && \
+ ln -s /openvino/scripts/setupvars/setupvars.sh /opt/intel/openvino/setupvars.sh && \
+ ln -s /opt/intel/openvino /opt/intel/openvino_2023
+################## END OF OPENVINO SOURCE BUILD ######################
################### TAKE OPENVINO FROM A BINARY RELEASE - buildarg ov_use_binary=1 (DEFAULT) ##########
@@ -149,7 +206,40 @@ RUN if [ "$ov_use_binary" = "1" ] && [ "$DLDT_PACKAGE_URL" != "" ]; then true ;
mkdir /opt/intel && \
tar -zxf l_openvino_toolkit*.tgz -C /opt/intel && \
ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino && \
- ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2022
+ ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2023
+# install sample apps including benchmark_app
+RUN yum install -y http://mirror.centos.org/centos/8-stream/PowerTools/x86_64/os/Packages/gflags-devel-2.2.2-1.el8.x86_64.rpm \
+ http://mirror.centos.org/centos/8-stream/PowerTools/x86_64/os/Packages/gflags-2.2.2-1.el8.x86_64.rpm \
+ https://download-ib01.fedoraproject.org/pub/epel/8/Everything/x86_64/Packages/j/json-devel-3.6.1-2.el8.x86_64.rpm && \
+ rm -rf /var/cache/yum
+RUN if [ -f /opt/intel/openvino/samples/cpp/build_samples.sh ]; then /opt/intel/openvino/samples/cpp/build_samples.sh ; fi
+#################### END OF OPENVINO BINARY INSTALL
+ENV OpenVINO_DIR=/opt/intel/openvino/runtime/cmake
+WORKDIR /openvino_contrib/modules/custom_operations/user_ie_extensions
+RUN if [ "$sentencepiece" == "1" ] ; then true ; else exit 0 ; fi ; cmake .. -DCMAKE_BUILD_TYPE=Release -DCUSTOM_OPERATIONS="sentence_piece" && cmake --build . --parallel $JOBS
+ENV OPENVINO_BUILD_PATH=/cuda_plugin_build
+ENV OPENVINO_CONTRIB=/openvino_contrib
+# hadolint ignore=DL3003
+RUN if [ "$NVIDIA" == "1" ] ; then true ; else exit 0 ; fi ; \
+ mkdir "${OPENVINO_BUILD_PATH}" && \
+ cd "${OPENVINO_BUILD_PATH}" && \
+ cmake "${OPENVINO_HOME}" \
+ -DBUILD_arm_plugin=OFF \
+ -DBUILD_java_api=OFF \
+ -DWHEEL_VERSION=2022.1.0 \
+ cmake --build "${OPENVINO_BUILD_PATH}" --target openvino_nvidia_gpu_plugin -j "$JOBS"
# Build OpenVINO Model Server
@@ -174,6 +264,14 @@ RUN curl -H "User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHT
RUN yum install -y https://github.com/linux-test-project/lcov/releases/download/v1.16/lcov-1.16-1.noarch.rpm && yum clean all
+ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/intel/openvino/runtime/lib/intel64/:/opt/opencv/lib/:/opt/intel/openvino/runtime/3rdparty/tbb/lib/
+ARG debug_bazel_flags=--strip=never\ --copt="-g"\ -c\ dbg
+RUN if [[ $debug_bazel_flags == *"MEDIAPIPE_DISABLE=1"* ]]; then true ; else exit 0 ; fi ; \
+ sed -i -e 's|3.19.1|3.9.2|g' WORKSPACE && \
+ sed -i -e 's|87407cd28e7a9c95d9f61a098a53cf031109d451a7763e7dd1253abf8b4df422|1fbf1c2962af287607232b2eddeaec9b4f4a7a6f5934e1a9276e9af76952f7e0|g' WORKSPACE
# hadolint ignore=DL3059
RUN bazel build --jobs=$JOBS ${debug_bazel_flags} @org_tensorflow//tensorflow/core:framework
@@ -181,7 +279,13 @@ RUN bazel build --jobs=$JOBS ${debug_bazel_flags} @org_tensorflow//tensorflow/co
# hadolint ignore=SC2046
RUN patch -d $(bazel info output_base)/external/build_bazel_rules_apple/ -p1 < /ovms/third_party/build_bazel_rules_apple/bazel_rules_apple.patch
-RUN bazel build --jobs=$JOBS ${debug_bazel_flags} @tensorflow_serving//tensorflow_serving/apis:prediction_service_cc_proto
+# Mediapipe
+COPY BUILD.bazel /ovms/
+COPY yarn.lock /ovms/
+COPY package.json /ovms/
+# prebuild dependencies before copying sources
+RUN bazel build --jobs=$JOBS ${debug_bazel_flags} //:ovms_dependencies
# hadolint ignore=DL3059
RUN cp -v /etc/ssl/certs/ca-bundle.crt /etc/ssl/certs/ca-certificates.crt
@@ -199,7 +303,7 @@ RUN make
LABEL description=${PROJECT_NAME}
@@ -207,43 +311,44 @@ LABEL description=${PROJECT_NAME}
RUN bash -c "sed -i -e 's|REPLACE_PROJECT_NAME|${PROJECT_NAME}|g' /ovms/src/version.hpp"
RUN if [ "$build_type" = "dbg" ] ; then bash -c "sed -i -e 's|REPLACE_PROJECT_VERSION|${PROJECT_VERSION}-debug|g' /ovms/src/version.hpp" ; else bash -c "sed -i -e 's|REPLACE_PROJECT_VERSION|${PROJECT_VERSION}|g' /ovms/src/version.hpp" ; fi ;
RUN if [ "$ov_use_binary" = "1" ] ; then true ; else exit 0 ; fi ; sed -i -e "s#REPLACE_OPENVINO_NAME#$(find /opt/intel/ -maxdepth 1 -mindepth 1 -type d | grep openvino | grep -Eo '[0-9]{4}.[0-9].[0-9].[0-9]+.[^_]+')#g" /ovms/src/version.hpp
-ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/intel/openvino/runtime/lib/intel64/:/opt/opencv/lib/:/opt/intel/openvino/runtime/3rdparty/tbb/lib/
+RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; sed -i -e "s#REPLACE_OPENVINO_NAME#$(git --git-dir /openvino/.git log -n 1 | head -n 1 | cut -d' ' -f2 | head -c 12)#g" /ovms/src/version.hpp
# Test Coverage
-COPY check_coverage.bat /ovms/
+COPY ci/check_coverage.bat /ovms/
-RUN if [ "$RUN_TESTS" == "1" ] ; then if [ "$CHECK_COVERAGE" = "1" ] ; then bazel coverage --combined_report=lcov --test_summary=detailed --test_output=streamed //src:ovms_test > ${TEST_LOG} 2>&1 || { cat ${TEST_LOG} && rm -rf ${TEST_LOG} && exit 1 ; } && genhtml --output genhtml "$(bazel info output_path)/_coverage/_coverage_report.dat" ; fi ; \
- bazel test ${debug_bazel_flags} --jobs=$JOBS --test_summary=detailed --test_output=streamed //src:ovms_test > ${TEST_LOG} 2>&1 || (cat ${TEST_LOG} && rm -rf ${TEST_LOG} && exit 1 ; ) && tail -n 100 ${TEST_LOG} && rm -rf ${TEST_LOG} ; fi ;
+RUN if [ "$RUN_TESTS" == "1" ] ; then if [ "$CHECK_COVERAGE" = "1" ] ; then bazel coverage --combined_report=lcov --jobs=$JOBS ${debug_bazel_flags} --test_timeout=1800 --test_summary=detailed --test_output=streamed --test_filter=-*Stress*Mediapipe* //src:ovms_test > ${TEST_LOG} 2>&1 || { cat ${TEST_LOG} && rm -rf ${TEST_LOG} && exit 1 ; } && genhtml --output genhtml "$(bazel info output_path)/_coverage/_coverage_report.dat" ; fi ; \
+ bazel test --jobs=$JOBS ${debug_bazel_flags} --test_timeout=1800 --test_summary=detailed --test_output=streamed --test_filter=-*Stress*Mediapipe* //src:ovms_test > ${TEST_LOG} 2>&1 || (cat ${TEST_LOG} && rm -rf ${TEST_LOG} && exit 1 ; ) && tail -n 100 ${TEST_LOG} && rm -rf ${TEST_LOG} ; fi ;
# C api shared library
-RUN bazel build ${debug_bazel_flags} --jobs $JOBS //src:ovms_shared
+RUN bazel build --jobs $JOBS ${debug_bazel_flags} //src:ovms_shared
# C api app with bazel
# hadolint ignore=DL3059
-RUN bazel build ${debug_bazel_flags} --jobs $JOBS //src:capi_cpp_example
+RUN bazel build --jobs $JOBS ${debug_bazel_flags} //src:capi_cpp_example
# C-API benchmark app
-RUN bazel build //src:capi_benchmark && ./bazel-bin/src/capi_benchmark --niter 2 --threads_per_ireq 2 --nireq 1 --servable_name "dummy" --inputs_names "b" --shape "b[1,10]"
+RUN bazel build --jobs=$JOBS ${debug_bazel_flags} //src:capi_benchmark && ./bazel-bin/src/capi_benchmark --niter 2 --threads_per_ireq 2 --nireq 1 --servable_name "dummy" --inputs_names "b" --shape "b[1,10]"
-RUN bazel build ${debug_bazel_flags} ${minitrace_flags} --jobs=$JOBS //src:ovms
+RUN bazel build --jobs=$JOBS ${debug_bazel_flags} ${minitrace_flags} //src:ovms
# hadolint ignore=DL3059
-RUN bazel build ${debug_bazel_flags} --jobs=$JOBS //src:libsampleloader.so
+RUN bazel build --jobs=$JOBS ${debug_bazel_flags} //src:libsampleloader.so
# C-api C/C++ app with gcc
-COPY MakefileCapi .
-RUN make -f MakefileCapi cpp && make -f MakefileCapi c
+COPY MakefileCapi /ovms/
+RUN make -f MakefileCapi cpp BAZEL_DEBUG_FLAGS="${debug_bazel_flags}" && \
+ make -f MakefileCapi c BAZEL_DEBUG_FLAGS="${debug_bazel_flags}"
ARG ovms_metadata_file
COPY ${ovms_metadata_file} metadata.json
-RUN if [ "$build_type" == "dbg" ] ; then bash -c "cp /ovms/bazel-out/k8-dbg/bin/src/ovms /ovms/bazel-bin/src/ovms" ; else exit 0; fi ;
RUN /ovms/bazel-bin/src/ovms --version && /ovms/bazel-bin/src/ovms
COPY release_files/thirdparty-licenses/ /ovms/release_files/thirdparty-licenses/
COPY release_files/LICENSE /ovms/release_files/LICENSE
COPY client /client
COPY demos /demos
+RUN rm -Rf /etc/entitlement /etc/rhsm/ca
@@ -24,14 +24,10 @@ SHELL ["/bin/bash", "-xo", "pipefail", "-c"]
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install --no-install-recommends -y \
- libboost-atomic1.71.0 \
- libboost-chrono1.71.0 \
- libboost-filesystem1.71.0 \
- libboost-program-options1.71.0 \
- libboost-thread1.71.0 \
- libboost-system1.71.0 \
- libboost-date-time1.71.0 \
build-essential \
+ gcc-9 \
+ g++-9 \
+ make \
cmake \
automake \
autoconf \
@@ -45,6 +41,8 @@ RUN apt-get update && apt-get install --no-install-recommends -y \
pkg-config \
wget \
zlib1g-dev && \
+ update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 9 && \
+ update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-9 9 && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
@@ -52,7 +50,9 @@ RUN apt-get update && apt-get install --no-install-recommends -y \
WORKDIR /boost
# hadolint ignore=DL3003
RUN wget -nv https://sourceforge.net/projects/boost/files/boost/1.69.0/boost_1_69_0.tar.gz && \
-tar xvf boost_1_69_0.tar.gz && cd boost_1_69_0 && ./bootstrap.sh && \
+tar xf boost_1_69_0.tar.gz && cd boost_1_69_0 && ./bootstrap.sh && \
+sed -i -e 's|#if PTHREAD_STACK_MIN > 0|#ifdef PTHREAD_STACK_MIN|g' boost/thread/pthread/thread_data.hpp && \
+# fix for compiler >=9.5 https://github.com/boostorg/thread/pull/297/files
./b2 -j ${JOBS} cxxstd=17 link=static cxxflags='-fPIC' cflags='-fPIC' \
--with-chrono --with-date_time --with-filesystem --with-program_options --with-system \
--with-random --with-thread --with-atomic --with-regex \
@@ -73,13 +73,14 @@ WORKDIR /azure/cpprestsdk/Release/build.release
WORKDIR /azure/azure-storage-cpp/Microsoft.WindowsAzure.Storage/build.release
+RUN CASABLANCA_DIR=/azure/cpprestsdk cmake .. -DCMAKE_CXX_FLAGS="-fPIC -Wno-error=deprecated-declarations" -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DBoost_USE_STATIC_RUNTIME=ON -DBoost_USE_STATIC_LIBS=ON -DCMAKE_VERBOSE_MAKEFILE=ON && make --jobs=$JOBS && make --jobs=$JOBS install
+# no-error flag related to https://github.com/aws/aws-sdk-cpp/issues/1582
####### End of Azure SDK
# Build AWS S3 SDK
RUN git clone https://github.com/aws/aws-sdk-cpp.git --branch 1.7.129 --single-branch --depth 1 /awssdk
WORKDIR /awssdk/build
####### End of AWS S3 SDK
@@ -109,23 +110,24 @@ RUN apt-get update && apt-get install --no-install-recommends -y \
nlohmann-json3-dev \
python2 \
python2-dev \
- python-setuptools \
- python3 \
- python3-pip \
- python3-dev \
- python-is-python3 \
- python3-setuptools \
- python3-virtualenv \
- python3-numpy \
- python-is-python3 \
unzip \
vim \
xz-utils && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
+RUN apt-get update && apt-get install -y software-properties-common gpg gpg-agent --no-install-recommends && \
+ add-apt-repository ppa:deadsnakes/ppa && \
+ apt-get install -y python3.8 python3.8-dev python3.8-distutils python3-pip --no-install-recommends && \
+ apt-get clean && rm -rf /var/lib/apt/lists/* && \
+ update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.8 1 && \
+ python3 -m pip install numpy==1.21.0 --no-cache-dir
ARG ov_use_binary=1
+ARG sentencepiece=0
+ARG ov_source_org=openvinotoolkit
+ARG ov_contrib_org=openvinotoolkit
ARG ov_source_branch=master
ARG ov_contrib_branch=master
@@ -134,17 +136,18 @@ ARG CMAKE_BUILD_TYPE=Release
# build_type=[ opt, dbg ]
ARG build_type=opt
-ARG debug_bazel_flags=--strip=never\ --copt="-g"\ -c\ dbg
ARG minitrace_flags
ENV HDDL_INSTALL_DIR=/opt/intel/openvino/deployment_tools/inference_engine/external/hddl
ENV TEST_LOG="/root/.cache/bazel/_bazel_root/bc57d4817a53cab8c785464da57d1983/execroot/ovms/bazel-out/test.log"
+# hadolint ignore=DL3003
+RUN if [[ "$sentencepiece" == "1" || "$NVIDIA" == "1" ]] ; then true ; else exit 0 ; fi ; git clone https://github.com/$ov_contrib_org/openvino_contrib.git /openvino_contrib && cd /openvino_contrib && git checkout $ov_contrib_branch && git submodule update --init --recursive
################### BUILD OPENVINO FROM SOURCE - buildarg ov_use_binary=0 ############################
# Build OpenVINO and nGraph (OV dependency) with D_GLIBCXX_USE_CXX11_ABI=0 or 1
# hadolint ignore=DL3003
-RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; git clone https://github.com/openvinotoolkit/openvino /openvino && cd /openvino && git checkout $ov_source_branch && git submodule update --init --recursive
+RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; git clone https://github.com/$ov_source_org/openvino.git /openvino && cd /openvino && git checkout $ov_source_branch && git submodule update --init --recursive
WORKDIR /openvino/build
RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; cmake -DCMAKE_BUILD_TYPE=$CMAKE_BUILD_TYPE -DENABLE_SAMPLES=0 -DNGRAPH_USE_CXX_ABI=1 -DCMAKE_CXX_FLAGS=" -D_GLIBCXX_USE_CXX11_ABI=1 -Wno-error=parentheses " ..
RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; make --jobs=$JOBS
@@ -155,7 +158,7 @@ RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; \
ln -s /openvino/inference-engine/temp/opencv_*_ubuntu20/opencv /opt/intel/openvino/extras && \
ln -s /usr/local/runtime /opt/intel/openvino && \
ln -s /openvino/scripts/setupvars/setupvars.sh /opt/intel/openvino/setupvars.sh && \
- ln -s /opt/intel/openvino /opt/intel/openvino_2022
+ ln -s /opt/intel/openvino /opt/intel/openvino_2023
################## END OF OPENVINO SOURCE BUILD ######################
ARG TEMP_DIR=/tmp/openvino_installer
@@ -168,7 +171,7 @@ WORKDIR $TEMP_DIR
# chmod 755 l_openvino_toolkit_* && \
# ./l_openvino_toolkit_* -a -s --eula accept && \
# rm -rf /opt/intel/openvino && \
-# ln -s /opt/intel/openvino_2022 /opt/intel/openvino
+# ln -s /opt/intel/openvino_2023 /opt/intel/openvino
# OV toolkit package
RUN if [ "$ov_use_binary" == "1" ] && [ "$DLDT_PACKAGE_URL" != "" ]; then true ; else exit 0 ; fi ; \
@@ -176,7 +179,7 @@ RUN if [ "$ov_use_binary" == "1" ] && [ "$DLDT_PACKAGE_URL" != "" ]; then true ;
mkdir /opt/intel && \
tar -zxf l_openvino_toolkit*.tgz -C /opt/intel && \
ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino && \
- ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2022
+ ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2023
# apt package
@@ -188,18 +191,27 @@ RUN if [ "$ov_use_binary" = "1" ] && [ "$DLDT_PACKAGE_URL" = "" ] ; then true ;
apt-get update && \
apt-get install --no-install-recommends -y $APT_OV_PACKAGE && \
rm -rf /var/lib/apt/lists/* && \
- ln -s /opt/intel/openvino_2022 /opt/intel/openvino
+ ln -s /opt/intel/openvino_2023 /opt/intel/openvino
+RUN wget -nv https://github.com/oneapi-src/oneTBB/releases/download/v2021.9.0/oneapi-tbb-2021.9.0-lin.tgz && \
+ tar -xzf oneapi-tbb-2021.9.0-lin.tgz && \
+ cp oneapi-tbb-2021.9.0/lib/intel64/gcc4.8/libtbb.so* /opt/intel/openvino/runtime/lib/intel64/
# install sample apps including benchmark_app
RUN if [ -f /opt/intel/openvino/samples/cpp/build_samples.sh ]; then /opt/intel/openvino/samples/cpp/build_samples.sh ; fi
+ENV OpenVINO_DIR=/opt/intel/openvino/runtime/cmake
+WORKDIR /openvino_contrib/modules/custom_operations/user_ie_extensions
+RUN if [ "$sentencepiece" == "1" ] ; then true ; else exit 0 ; fi ; cmake .. -DCMAKE_BUILD_TYPE=Release -DCUSTOM_OPERATIONS="sentence_piece" && cmake --build . --parallel $JOBS
ENV OPENVINO_BUILD_PATH=/cuda_plugin_build
ENV OPENVINO_CONTRIB=/openvino_contrib
# Add Nvidia dev tool if needed
# hadolint ignore=DL3003
RUN if [ "$NVIDIA" == "1" ] ; then true ; else exit 0 ; fi ; \
@@ -223,7 +235,6 @@ RUN if [ "$NVIDIA" == "1" ] ; then true ; else exit 0 ; fi ; \
# hadolint ignore=DL3003
RUN if [ "$NVIDIA" == "1" ] ; then true ; else exit 0 ; fi ; \
- git clone https://github.com/openvinotoolkit/openvino_contrib.git /openvino_contrib && cd /openvino_contrib && git checkout $ov_contrib_branch && git submodule update --init --recursive && \
cmake "${OPENVINO_HOME}" \
@@ -231,6 +242,10 @@ RUN if [ "$NVIDIA" == "1" ] ; then true ; else exit 0 ; fi ; \
-DBUILD_arm_plugin=OFF \
-DBUILD_java_api=OFF \
+ -DBUILD_custom_operations=OFF \
+ -DBUILD_mo_pytorch=OFF \
+ -DBUILD_optimum=OFF \
+ -DBUILD_ovms_ai_extension=OFF \
-DWHEEL_VERSION=2022.1.0 \
@@ -250,7 +265,13 @@ RUN curl -H "User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHT
COPY .bazelrc WORKSPACE /ovms/
COPY external /ovms/external/
-COPY MakefileCapi /ovms/
+ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/intel/openvino/runtime/lib/intel64/:/opt/opencv/lib/
+ARG debug_bazel_flags=--strip=never\ --copt="-g "\ -c\ dbg
+RUN if [[ $debug_bazel_flags == *"MEDIAPIPE_DISABLE=1"* ]]; then true ; else exit 0 ; fi ; \
+ sed -i -e 's|3.19.1|3.9.2|g' WORKSPACE && \
+ sed -i -e 's|87407cd28e7a9c95d9f61a098a53cf031109d451a7763e7dd1253abf8b4df422|1fbf1c2962af287607232b2eddeaec9b4f4a7a6f5934e1a9276e9af76952f7e0|g' WORKSPACE
RUN bazel build --jobs=$JOBS ${debug_bazel_flags} @org_tensorflow//tensorflow/core:framework
@@ -262,7 +283,13 @@ COPY third_party /ovms/third_party/
# hadolint ignore=SC2046
RUN patch -d $(bazel info output_base)/external/build_bazel_rules_apple/ -p1 < /ovms/third_party/build_bazel_rules_apple/bazel_rules_apple.patch
-RUN bazel build --jobs=$JOBS ${debug_bazel_flags} @tensorflow_serving//tensorflow_serving/apis:prediction_service_cc_proto
+# Mediapipe
+COPY BUILD.bazel /ovms/
+COPY yarn.lock /ovms/
+COPY package.json /ovms/
+# prebuild dependencies before copying sources
+RUN bazel build --jobs=$JOBS ${debug_bazel_flags} //:ovms_dependencies
# Copy example clients into build image for static analysis
WORKDIR /example_cpp_client
@@ -286,38 +313,40 @@ RUN bash -c "sed -i -e 's|REPLACE_PROJECT_NAME|${PROJECT_NAME}|g' /ovms/src/vers
RUN if [ "$build_type" == "dbg" ] ; then bash -c "sed -i -e 's|REPLACE_PROJECT_VERSION|${PROJECT_VERSION}-debug|g' /ovms/src/version.hpp" ; else bash -c "sed -i -e 's|REPLACE_PROJECT_VERSION|${PROJECT_VERSION}|g' /ovms/src/version.hpp" ; fi ;
RUN if [ "$ov_use_binary" == "1" ] ; then true ; else exit 0 ; fi ; sed -i -e "s#REPLACE_OPENVINO_NAME#$(find /opt/intel/ -maxdepth 1 -mindepth 1 -type d | grep openvino | grep -Eo '[0-9]{4}.[0-9].[0-9].[0-9]+.[^_]+')#g" /ovms/src/version.hpp
RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; sed -i -e "s#REPLACE_OPENVINO_NAME#$(git --git-dir /openvino/.git log -n 1 | head -n 1 | cut -d' ' -f2 | head -c 12)#g" /ovms/src/version.hpp
-ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/intel/openvino/runtime/lib/intel64/:/opt/opencv/lib/:/opt/intel/openvino/runtime/3rdparty/tbb/lib/
# Test Coverage
-COPY check_coverage.bat /ovms/
+COPY ci/check_coverage.bat /ovms/
-RUN if [ "$RUN_TESTS" == "1" ] ; then if [ "$CHECK_COVERAGE" == "1" ] ; then bazel coverage --combined_report=lcov --test_summary=detailed --test_output=streamed //src:ovms_test > ${TEST_LOG} 2>&1 || { cat ${TEST_LOG} && rm -rf ${TEST_LOG} && exit 1 ; } && genhtml --output genhtml "$(bazel info output_path)/_coverage/_coverage_report.dat" ; fi ; \
- bazel test ${debug_bazel_flags} --jobs=$JOBS --test_summary=detailed --test_output=streamed //src:ovms_test > ${TEST_LOG} 2>&1 || (cat ${TEST_LOG} && rm -rf ${TEST_LOG} && exit 1 ; ) && tail -n 100 ${TEST_LOG} && rm -rf ${TEST_LOG} ; fi ;
+RUN if [ "$RUN_TESTS" == "1" ] ; then if [ "$CHECK_COVERAGE" == "1" ] ; then \
+ bazel coverage --combined_report=lcov --jobs=$JOBS ${debug_bazel_flags} --test_timeout=1800 --test_summary=detailed --test_output=streamed --test_filter=-*Stress*Mediapipe* //src:ovms_test > ${TEST_LOG} 2>&1 || { cat ${TEST_LOG} && rm -rf ${TEST_LOG} && exit 1 ; } && genhtml --output genhtml "$(bazel info output_path)/_coverage/_coverage_report.dat" ; fi ; \
+ bazel test --jobs=$JOBS ${debug_bazel_flags} --test_timeout=1800 --test_summary=detailed --test_output=streamed --test_filter=-*Stress*Mediapipe* //src:ovms_test > ${TEST_LOG} 2>&1 || (cat ${TEST_LOG} && rm -rf ${TEST_LOG} && exit 1 ; ) && tail -n 100 ${TEST_LOG} && rm -rf ${TEST_LOG} ; fi ;
# C api shared library
-RUN bazel build ${debug_bazel_flags} --jobs $JOBS //src:ovms_shared
+RUN bazel build --jobs=$JOBS ${debug_bazel_flags} //src:ovms_shared
# C api app with bazel
# hadolint ignore=DL3059
-RUN bazel build ${debug_bazel_flags} --jobs $JOBS //src:capi_cpp_example
+RUN bazel build --jobs $JOBS ${debug_bazel_flags} //src:capi_cpp_example
# C-API benchmark app
-RUN bazel build //src:capi_benchmark && ./bazel-bin/src/capi_benchmark --niter 2 --threads_per_ireq 2 --nireq 1 --servable_name "dummy" --inputs_names "b" --shape "b[1,10]"
+RUN bazel build --jobs=$JOBS ${debug_bazel_flags} //src:capi_benchmark && ./bazel-bin/src/capi_benchmark --niter 2 --threads_per_ireq 2 --nireq 1 --servable_name "dummy" --inputs_names "b" --shape "b[1,10]"
-RUN bazel build ${debug_bazel_flags} ${minitrace_flags} --jobs=$JOBS //src:ovms
+RUN bazel build --jobs=$JOBS ${debug_bazel_flags} ${minitrace_flags} //src:ovms
# hadolint ignore=DL3059
-RUN bazel build ${debug_bazel_flags} --jobs=$JOBS //src:libsampleloader.so
+RUN bazel build --jobs=$JOBS ${debug_bazel_flags} //src:libsampleloader.so
# C-api C/C++ app with gcc
-RUN make -f MakefileCapi cpp && make -f MakefileCapi c
+COPY MakefileCapi /ovms/
+RUN make -f MakefileCapi cpp BAZEL_DEBUG_FLAGS="${debug_bazel_flags}" && \
+ make -f MakefileCapi c BAZEL_DEBUG_FLAGS="${debug_bazel_flags}"
ARG ovms_metadata_file
COPY ${ovms_metadata_file} metadata.json
-RUN if [ "$build_type" == "dbg" ] ; then bash -c "cp /ovms/bazel-out/k8-dbg/bin/src/ovms /ovms/bazel-bin/src/ovms" ; else exit 0; fi ;
RUN /ovms/bazel-bin/src/ovms --version && /ovms/bazel-bin/src/ovms
COPY release_files/thirdparty-licenses/ /ovms/release_files/thirdparty-licenses/
@@ -21,6 +21,7 @@ FROM $BUILD_IMAGE
ARG BASE_OS=ubuntu
ARG ov_use_binary=1
+ARG sentencepiece=0
SHELL ["/bin/bash", "-c"]
RUN mkdir /patchelf && cd /patchelf && \
@@ -28,28 +29,30 @@ RUN mkdir /patchelf && cd /patchelf && \
tar -xf 0.10.tar.gz && ls -lah && cd */ && \
./bootstrap.sh && ./configure && make && make install
-RUN mkdir -vp /ovms_release/bin
-RUN mkdir -vp /ovms_release/deps
-RUN mkdir -vp /ovms_release/lib
-RUN mkdir -vp /ovms_release/lib/hddl/config
-RUN mkdir -vp /ovms_release/lib/custom_nodes
+RUN mkdir -vp /ovms_release/bin && \
+ mkdir -vp /ovms_release/deps && \
+ mkdir -vp /ovms_release/lib && \
+ mkdir -vp /ovms_release/lib/hddl/config && \
+ mkdir -vp /ovms_release/lib/custom_nodes
RUN if [ -d /ovms/src/custom_nodes/lib/${BASE_OS} ] ; then true ; else exit 0 ; fi ; cp /ovms/src/custom_nodes/lib/${BASE_OS}/*.so /ovms_release/lib/custom_nodes/
+RUN if [ -d /ovms/src/custom_nodes/tokenizer/lib/${BASE_OS} ] ; then true ; else exit 0 ; fi ; cp /ovms/src/custom_nodes/tokenizer/lib/${BASE_OS}/*.so /ovms_release/lib/custom_nodes/
RUN cp /ovms/metadata.json /ovms_release/
+RUN if [ "$sentencepiece" == "1" ]; then true ; else exit 0 ; fi ; cp -v /openvino_contrib/modules/custom_operations/user_ie_extensions/user_ie_extensions/libuser_ov_extensions.so /ovms_release/lib/
RUN if [ "$NVIDIA" == "1" ]; then true ; else exit 0 ; fi ; cp -v /openvino/bin/intel64/Release/libopenvino_nvidia_gpu_plugin.so /ovms_release/lib/
-RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; cp -v /openvino/bin/intel64/Release/plugins.xml /ovms_release/lib/
+RUN if [ "$NVIDIA" == "1" ]; then true ; else exit 0 ; fi ; echo '' > /ovms_release/lib/plugins.xml
RUN if [ "$ov_use_binary" == "1" ] ; then true ; else exit 0 ; fi ; cp -v /opt/intel/openvino/runtime/3rdparty/hddl/config/* /ovms_release/lib/hddl/config/ || true
RUN if [ "$ov_use_binary" == "1" ] ; then true ; else exit 0 ; fi ; cp -vr /opt/intel/openvino/runtime/3rdparty/hddl/etc/* /ovms_release/lib/hddl/etc/ || true
-RUN if [ "$ov_use_binary" == "1" ] ; then true ; else exit 0 ; fi ; cp -v /opt/intel/openvino/runtime/lib/intel64/plugins.xml /ovms_release/lib/ && cp /opt/intel/openvino/install_dependencies/* /ovms_release/deps/
+RUN if [ "$ov_use_binary" == "1" ] ; then true ; else exit 0 ; fi ; cp /opt/intel/openvino/install_dependencies/* /ovms_release/deps/
RUN if [ "$ov_use_binary" == "1" ] ; then true ; else exit 0 ; fi ; rm -vrf /ovms_release/deps/*-devel-*
RUN find /ovms/bazel-out/k8-*/bin -iname '*.so*' -exec cp -v {} /ovms_release/lib/ \;
RUN cd /ovms_release/lib/ ; rm -f libazurestorage.so.* ; ln -s libazurestorage.so libazurestorage.so.7 ;ln -s libazurestorage.so libazurestorage.so.7.5
RUN cd /ovms_release/lib/ ; rm -f libcpprest.so.2.10 ; ln -s libcpprest.so libcpprest.so.2.10
-RUN rm -f /ovms_release/lib/libssl.so
-RUN rm -f /ovms_release/lib/libsampleloader*
-RUN rm -f /ovms_release/lib/lib_node*
-RUN rm -f /ovms_release/lib/libcustom_node*
+RUN rm -f /ovms_release/lib/libssl.so && \
+ rm -f /ovms_release/lib/libsampleloader* && \
+ rm -f /ovms_release/lib/lib_node* && \
+ rm -f /ovms_release/lib/libcustom_node*
# Remove coverage libaries
RUN if [ -f /ovms_release/lib/libjava.so ] ; then true ; else exit 0 ; fi ;cd /ovms_release/lib/ &&\
@@ -78,21 +81,24 @@ RUN if [ "$BASE_OS" == "redhat" ] ; then true ; else exit 0 ; fi ; cp /usr/lib64
RUN find /ovms/bazel-bin/src -name 'ovms' -type f -exec cp -v {} /ovms_release/bin \;
WORKDIR /ovms_release/bin
RUN patchelf --remove-rpath ./ovms && patchelf --set-rpath '$ORIGIN/../lib/' ./ovms
-RUN find /ovms_release/lib/ -iname '*.so*' -exec patchelf --debug --remove-rpath {} \;
-RUN find /ovms_release/lib/ -iname '*.so*' -exec patchelf --debug --set-rpath '$ORIGIN/../lib' {} \;
+RUN find /ovms_release/lib/ -iname '*.so*' -exec patchelf --debug --remove-rpath {} \; && \
+ find /ovms_release/lib/ -iname '*.so*' -exec patchelf --debug --set-rpath '$ORIGIN/../lib' {} \;
-RUN cp -v /ovms/release_files/LICENSE /ovms_release/
-RUN cp -rv /ovms/release_files/thirdparty-licenses /ovms_release/
-RUN mkdir -vp /ovms_release/include && cp /ovms/src/ovms.h /ovms_release/include
+RUN cp -v /ovms/release_files/LICENSE /ovms_release/ && \
+ cp -rv /ovms/release_files/thirdparty-licenses /ovms_release/ && \
+ mkdir -vp /ovms_release/include && cp /ovms/src/ovms.h /ovms_release/include && \
+ ls -lahR /ovms_release/
-RUN ls -lahR /ovms_release/
-RUN find /ovms_release/lib/ -iname '*.so*' -type f -exec patchelf --remove-rpath {} \;
-RUN find /ovms_release/lib/ -iname '*.so*' -type f -exec patchelf --set-rpath '$ORIGIN/../lib' {} \;
+RUN find /ovms_release/lib/ -iname '*.so*' -type f -exec patchelf --remove-rpath {} \; && \
+ find /ovms_release/lib/ -iname '*.so*' -type f -exec patchelf --set-rpath '$ORIGIN/../lib' {} \;
-RUN tar czf ovms.tar.gz --transform 's/ovms_release/ovms/' /ovms_release/ && sha256sum ovms.tar.gz > ovms.tar.gz.sha256 && cp /ovms_release/metadata.json /ovms.tar.gz.metadata.json
-RUN tar cJf ovms.tar.xz --transform 's/ovms_release/ovms/' /ovms_release/ && sha256sum ovms.tar.xz > ovms.tar.xz.sha256 && cp /ovms_release/metadata.json /ovms.tar.xz.metadata.json
+RUN tar czf ovms.tar.gz --transform 's/ovms_release/ovms/' /ovms_release/ && \
+ sha256sum ovms.tar.gz > ovms.tar.gz.sha256 && \
+ cp /ovms_release/metadata.json /ovms.tar.gz.metadata.json && \
+ tar cJf ovms.tar.xz --transform 's/ovms_release/ovms/' /ovms_release/ && \
+ sha256sum ovms.tar.xz > ovms.tar.xz.sha256 && \
+ cp /ovms_release/metadata.json /ovms.tar.xz.metadata.json
@@ -44,12 +44,18 @@ CHECK_COVERAGE ?=0
# NOTE: when changing any value below, you'll need to adjust WORKSPACE file by hand:
# - uncomment source build section, comment binary section
# - adjust binary version path - version variable is not passed to WORKSPACE file!
+OV_SOURCE_BRANCH ?= releases/2023/0
+OV_CONTRIB_BRANCH ?= releases/2023/0
+OV_SOURCE_ORG ?= openvinotoolkit
+OV_CONTRIB_ORG ?= openvinotoolkit
APT_OV_PACKAGE ?= openvino-2022.1.0
@@ -58,10 +64,15 @@ BAZEL_BUILD_TYPE ?= opt
ifeq ($(BAZEL_BUILD_TYPE),dbg)
- BAZEL_DEBUG_FLAGS=" --strip=never --copt=-g -c dbg "
+ BAZEL_DEBUG_FLAGS=" --strip=never --copt=-g -c dbg "$(DISABLE_MEDIAPIPE_PARAMS)
- BAZEL_DEBUG_FLAGS=" --strip=never "
ifeq ($(MINITRACE),ON)
@@ -79,18 +90,31 @@ ifeq ($(BASE_OS),ubuntu)
ifeq ($(NVIDIA),1)
+ endif
+ ifeq ($(BASE_OS_TAG_UBUNTU),20.04)
+ INSTALL_DRIVER_VERSION ?= "22.43.24595"
+ DLDT_PACKAGE_URL ?= https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.0/linux/l_openvino_toolkit_ubuntu20_2023.0.0.10926.b4452d56304_x86_64.tgz
+ else ifeq ($(BASE_OS_TAG_UBUNTU),22.04)
+ INSTALL_DRIVER_VERSION ?= "23.13.26032"
+ DLDT_PACKAGE_URL ?= https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.0/linux/l_openvino_toolkit_ubuntu22_2023.0.0.10926.b4452d56304_x86_64.tgz
- INSTALL_DRIVER_VERSION ?= "22.35.24055"
- DLDT_PACKAGE_URL ?= https://storage.openvinotoolkit.org/repositories/openvino/packages/2022.3/linux/l_openvino_toolkit_ubuntu20_2022.3.0.9052.9752fafe8eb_x86_64.tgz
ifeq ($(BASE_OS),redhat)
- BASE_IMAGE ?= registry.access.redhat.com/ubi8/ubi:$(BASE_OS_TAG_REDHAT)
+ ifeq ($(NVIDIA),1)
+ BASE_IMAGE=docker.io/nvidia/cuda:11.8.0-runtime-ubi8
+ else
+ BASE_IMAGE ?= registry.access.redhat.com/ubi8/ubi:$(BASE_OS_TAG_REDHAT)
+ BASE_IMAGE_RELEASE=registry.access.redhat.com/ubi8/ubi-minimal:$(BASE_OS_TAG_REDHAT)
+ endif
- INSTALL_DRIVER_VERSION ?= "22.28.23726"
- DLDT_PACKAGE_URL ?= https://storage.openvinotoolkit.org/repositories/openvino/packages/2022.3/linux/l_openvino_toolkit_rhel8_2022.3.0.9052.9752fafe8eb_x86_64.tgz
+ INSTALL_DRIVER_VERSION ?= "22.43.24595"
+ DLDT_PACKAGE_URL ?= https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.0/linux/l_openvino_toolkit_rhel8_2023.0.0.10926.b4452d56304_x86_64.tgz
OVMS_CPP_DOCKER_IMAGE ?= openvino/model_server
@@ -104,7 +128,7 @@ ifeq ($(NVIDIA),1)
PRODUCT_NAME = "OpenVINO Model Server"
-PRODUCT_VERSION ?= "2022.3.0.1"
+PRODUCT_VERSION ?= "2023.0.0"
OVMS_CPP_CONTAINTER_NAME ?= server-test$(shell date +%Y-%m-%d-%H.%M.%S)
@@ -131,28 +155,22 @@ $(ACTIVATE):
cppclean: venv
@echo "Checking cppclean..."
- @. $(ACTIVATE); bash -c "./cppclean.sh"
+ @. $(ACTIVATE); bash -c "./ci/cppclean.sh"
style: venv clang-format-check cpplint cppclean
-sdl-check: venv
@echo "Checking SDL requirements..."
@echo "Checking docker files..."
@echo "Checking python files..."
- @. $(ACTIVATE); bash -c "bandit -x demos/benchmark/python -r demos/*/python > bandit.txt"
- @if ! grep -FRq "No issues identified." bandit.txt; then\
- error Run bandit on demos/*/python/*.py to fix issues.;\
- fi
- @rm bandit.txt
- @. $(ACTIVATE); bash -c "bandit -r client/python/ > bandit2.txt"
- @if ! grep -FRq "No issues identified." bandit2.txt; then\
- error Run bandit on client/python/ to fix issues.;\
- fi
- @rm bandit2.txt
+ @. $(ACTIVATE); bash -c "./ci/bandit.sh"
@echo "Checking license headers in files..."
- @. $(ACTIVATE); bash -c "python3 lib_search.py . > missing_headers.txt"
+ @. $(ACTIVATE); bash -c "python3 ./ci/lib_search.py . > missing_headers.txt"
@if ! grep -FRq "All files have headers" missing_headers.txt; then\
echo "Files with missing headers";\
cat missing_headers.txt;\
@@ -160,10 +178,12 @@ sdl-check: venv
@rm missing_headers.txt
+sdl-check: venv hadolint bandit license-headers
@echo "Checking forbidden functions in files..."
- @. $(ACTIVATE); bash -c "python3 lib_search.py . functions > forbidden_functions.txt"
+ @. $(ACTIVATE); bash -c "python3 ./ci/lib_search.py . functions > forbidden_functions.txt"
@if ! grep -FRq "All files checked for forbidden functions" forbidden_functions.txt; then\
- error Run python3 lib_search.py . functions - to see forbidden functions file list.;\
+ error Run python3 ./ci/lib_search.py . functions - to see forbidden functions file list.;\
@rm forbidden_functions.txt
@@ -192,25 +212,39 @@ ifeq ($(NVIDIA),1)
ifeq ($(OV_USE_BINARY),1)
@echo "Building NVIDIA plugin requires OV built from source. To build NVIDIA plugin and OV from source make command should look like this 'NVIDIA=1 OV_USE_BINARY=0 make docker_build'"; exit 1 ;
+ ifeq ($(BASE_OS),redhat)
+ @echo "copying RH entitlements"
+ @cp -ru /etc/pki/entitlement .
+ @mkdir rhsm-ca
+ @cp -u /etc/rhsm/ca/* rhsm-ca/
+ endif
-ifeq ($(BUILD_CUSTOM_NODES),true)
- @echo "Building custom nodes"
- @cd src/custom_nodes && make BASE_OS=$(BASE_OS)
+ifeq ($(BASE_OS),redhat)
+ @mkdir -p entitlement
+ @mkdir -p rhsm-ca
- @echo "Building docker image $(BASE_OS)"
- # Provide metadata information into image if defined
- @mkdir -p .workspace
- @bash -c '$(eval PROJECT_VER_PATCH:=`git rev-parse --short HEAD`)'
- @bash -c '$(eval PROJECT_NAME:=${PRODUCT_NAME})'
ifeq ($(NO_DOCKER_CACHE),true)
$(eval NO_CACHE_OPTION:=--no-cache)
@echo "Docker image will be rebuilt from scratch"
@docker pull $(BASE_IMAGE)
ifeq ($(BASE_OS),redhat)
@docker pull registry.access.redhat.com/ubi8/ubi-minimal:$(BASE_OS_TAG_REDHAT)
+ ifeq ($(NVIDIA),1)
+ @docker pull docker.io/nvidia/cuda:11.8.0-runtime-ubi8
+ endif
+ifeq ($(BUILD_CUSTOM_NODES),true)
+ @echo "Building custom nodes"
+ @cd src/custom_nodes/tokenizer && make NO_DOCKER_CACHE=$(NO_DOCKER_CACHE) BASE_OS=$(BASE_OS) BASE_IMAGE=$(BASE_IMAGE)
+ @echo "Building docker image $(BASE_OS)"
+ # Provide metadata information into image if defined
+ @mkdir -p .workspace
+ @bash -c '$(eval PROJECT_VER_PATCH:=`git rev-parse --short HEAD`)'
+ @bash -c '$(eval PROJECT_NAME:=${PRODUCT_NAME})'
@cp $(OVMS_METADATA_FILE) .workspace/metadata.json
@@ -219,8 +253,9 @@ endif
@cat .workspace/metadata.json
docker build $(NO_CACHE_OPTION) -f Dockerfile.$(BASE_OS) . \
--build-arg http_proxy=$(HTTP_PROXY) --build-arg https_proxy=$(HTTPS_PROXY) --build-arg no_proxy=$(NO_PROXY) \
- --build-arg ovms_metadata_file=.workspace/metadata.json --build-arg ov_source_branch="$(OV_SOURCE_BRANCH)" \
- --build-arg ov_use_binary=$(OV_USE_BINARY) --build-arg DLDT_PACKAGE_URL=$(DLDT_PACKAGE_URL) \
+ --build-arg ovms_metadata_file=.workspace/metadata.json --build-arg ov_source_branch="$(OV_SOURCE_BRANCH)" --build-arg ov_source_org="$(OV_SOURCE_ORG)" \
+ --build-arg ov_contrib_org="$(OV_CONTRIB_ORG)" \
+ --build-arg ov_use_binary=$(OV_USE_BINARY) --build-arg sentencepiece=$(SENTENCEPIECE) --build-arg DLDT_PACKAGE_URL=$(DLDT_PACKAGE_URL) \
--build-arg build_type=$(BAZEL_BUILD_TYPE) --build-arg debug_bazel_flags=$(BAZEL_DEBUG_FLAGS) \
@@ -235,7 +270,7 @@ endif
targz_package: ovms_builder_image
docker build $(NO_CACHE_OPTION) -f DockerfileMakePackage . \
--build-arg http_proxy=$(HTTP_PROXY) --build-arg https_proxy="$(HTTPS_PROXY)" \
- --build-arg ov_use_binary=$(OV_USE_BINARY) --build-arg BASE_OS=$(BASE_OS) \
+ --build-arg ov_use_binary=$(OV_USE_BINARY) --build-arg sentencepiece=$(SENTENCEPIECE) --build-arg BASE_OS=$(BASE_OS) \
--build-arg NVIDIA=$(NVIDIA) \
@@ -253,7 +288,7 @@ ovms_release_image: targz_package
--build-arg no_proxy=$(NO_PROXY) \
--build-arg GPU=0 \
- --build-arg BASE_IMAGE=$(BASE_IMAGE) \
--build-arg NVIDIA=$(NVIDIA) \
cd dist/$(DIST_OS)/ && docker build $(NO_CACHE_OPTION) -f Dockerfile.$(BASE_OS) . \
@@ -262,7 +297,7 @@ ovms_release_image: targz_package
--build-arg GPU=1 \
- --build-arg BASE_IMAGE=$(BASE_IMAGE) \
--build-arg NVIDIA=$(NVIDIA) \
@@ -434,6 +469,7 @@ cpu_extension:
--build-arg https_proxy=${https_proxy} \
--build-arg no_proxy=${no_proxy} \
- --build-arg APT_OV_PACKAGE=${APT_OV_PACKAGE} .
+ --build-arg APT_OV_PACKAGE=${APT_OV_PACKAGE} \
+ --build-arg BASE_IMAGE=${BASE_IMAGE} .
mkdir -p ./lib/${BASE_OS}
docker cp $$(docker create --rm sample_cpu_extension:latest):/workspace/libcustom_relu_cpu_extension.so ./lib/${BASE_OS}
@@ -13,13 +13,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
- bazel build //src:ovms_shared
+ bazel build ${BAZEL_DEBUG_FLAGS} //src:ovms_shared
g++ src/main_capi.cpp -I/ovms/src/ -L/ovms/bazel-bin/src/ -lovms_shared -fPIC --std=c++17 -o /ovms/bazel-bin/src/capi_cpp_example
LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/ovms/bazel-bin/src/ /ovms/bazel-bin/src/capi_cpp_example
- bazel build //src:ovms_shared
+ bazel build ${BAZEL_DEBUG_FLAGS} //src:ovms_shared
gcc -c src/main_capi.c -o /ovms/bazel-bin/src/main_capi.o -std=c99
gcc -o /ovms/bazel-bin/src/capi_c_example /ovms/bazel-bin/src/main_capi.o -lovms_shared -L/ovms/bazel-bin/src/
LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/ovms/bazel-bin/src/ /ovms/bazel-bin/src/capi_c_example
@@ -1,33 +1,40 @@
# OpenVINO™ Model Server
+Model Server hosts models and makes them accessible to software components over standard network protocols: a client sends a request to the model server, which performs model inference and sends a response back to the client. Model Server offers many advantages for efficient model deployment:
+- Remote inference enables using lightweight clients with only the necessary functions to perform API calls to edge or cloud deployments.
+- Applications are independent of the model framework, hardware device, and infrastructure.
+- Client applications in any programming language that supports REST or gRPC calls can be used to run inference remotely on the model server.
+- Clients require fewer updates since client libraries change very rarely.
+- Model topology and weights are not exposed directly to client applications, making it easier to control access to the model.
+- Ideal architecture for microservices-based applications and deployments in cloud environments – including Kubernetes and OpenShift clusters.
+- Efficient resource utilization with horizontal and vertical inference scaling.
-OpenVINO™ Model Server (OVMS) is a high-performance system for serving machine learning models. It is based on C++ for high scalability
-and optimized for Intel solutions, so that you can take advantage of all the power of the Intel® Xeon® processor or Intel’s AI accelerators
-and expose it over a network interface. OVMS uses the same architecture and API as [TensorFlow Serving](https://github.com/tensorflow/serving),
-while applying OpenVINO for inference execution. Inference service is provided via gRPC or REST API, making it easy to deploy new algorithms and AI experiments.
-Model repositories may reside on a locally accessible file system (e.g. NFS), as well as online storage compatible with
-Google Cloud Storage (GCS), Amazon S3, or Azure Blob Storage.
+OpenVINO™ Model Server (OVMS) is a high-performance system for serving models. Implemented in C++ for scalability and optimized for deployment on Intel architectures, the model server uses the same architecture and API as [TensorFlow Serving](https://github.com/tensorflow/serving) and [KServe](https://github.com/kserve/kserve) while applying OpenVINO for inference execution. Inference service is provided via gRPC or REST API, making deploying new algorithms and AI experiments easy.
+The models used by the server need to be stored locally or hosted remotely by object storage services. For more details, refer to [Preparing Model Repository](docs/models_repository.md) documentation. Model server works inside [Docker containers](docs/deploying_server.md), on [Bare Metal](docs/deploying_server.md), and in [Kubernetes environment](docs/deploying_server.md).
+Start using OpenVINO Model Server with a fast-forward serving example from the [Quickstart guide](docs/ovms_quickstart.md) or explore [Model Server features](docs/features.md).
Read [release notes](https://github.com/openvinotoolkit/model_server/releases) to find out what’s new.
Key features:
- support for multiple frameworks, such as Caffe, TensorFlow, MXNet, PaddlePaddle and ONNX
-- online deployment of new [model versions](https://docs.openvino.ai/2022.2/ovms_docs_model_version_policy.html)
-- [configuration updates in runtime](https://docs.openvino.ai/2022.2/ovms_docs_online_config_changes.html)
+- online deployment of new [model versions](https://docs.openvino.ai/2023.0/ovms_docs_model_version_policy.html)
+- [configuration updates in runtime](https://docs.openvino.ai/2023.0/ovms_docs_online_config_changes.html)
- support for AI accelerators, such as
-[Intel Movidius Myriad VPUs](https://docs.openvino.ai/2022.2/openvino_docs_OV_UG_supported_plugins_MYRIAD.html),
-[GPU](https://docs.openvino.ai/2022.2/openvino_docs_OV_UG_supported_plugins_GPU.html), and
-- works with Bare Metal Hosts as well as [Docker containers](https://docs.openvino.ai/2022.3/ovms_docs_deploying_server.html)
-- [model reshaping](https://docs.openvino.ai/2022.2/ovms_docs_shape_batch_layout.html) in runtime
-- [directed Acyclic Graph Scheduler](https://docs.openvino.ai/2022.2/ovms_docs_dag.html) - connecting multiple models to deploy complex processing solutions and reducing data transfer overhead
-- [custom nodes in DAG pipelines](https://docs.openvino.ai/2022.2/ovms_docs_custom_node_development.html) - allowing model inference and data transformations to be implemented with a custom node C/C++ dynamic library
-- [serving stateful models](https://docs.openvino.ai/2022.2/ovms_docs_stateful_models.html) - models that operate on sequences of data and maintain their state between inference requests
-- [binary format of the input data](https://docs.openvino.ai/2022.2/ovms_docs_binary_input.html) - data can be sent in JPEG or PNG formats to reduce traffic and offload the client applications
-- [model caching](https://docs.openvino.ai/2022.2/ovms_docs_model_cache.html) - cache the models on first load and re-use models from cache on subsequent loads
-- [metrics](https://docs.openvino.ai/2022.2/ovms_docs_metrics.html) - metrics compatible with Prometheus standard
+[Intel Movidius Myriad VPUs](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_supported_plugins_MYRIAD.html),
+[GPU](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_supported_plugins_GPU.html), and
+- works with Bare Metal Hosts as well as [Docker containers](https://docs.openvino.ai/2023.0/ovms_docs_deploying_server.html)
+- [model reshaping](https://docs.openvino.ai/2023.0/ovms_docs_shape_batch_layout.html) in runtime
+- [directed Acyclic Graph Scheduler](https://docs.openvino.ai/2023.0/ovms_docs_dag.html) - connecting multiple models to deploy complex processing solutions and reducing data transfer overhead
+- [custom nodes in DAG pipelines](https://docs.openvino.ai/2023.0/ovms_docs_custom_node_development.html) - allowing model inference and data transformations to be implemented with a custom node C/C++ dynamic library
+- [serving stateful models](https://docs.openvino.ai/2023.0/ovms_docs_stateful_models.html) - models that operate on sequences of data and maintain their state between inference requests
+- [binary format of the input data](https://docs.openvino.ai/2023.0/ovms_docs_binary_input.html) - data can be sent in JPEG or PNG formats to reduce traffic and offload the client applications
+- [model caching](https://docs.openvino.ai/2023.0/ovms_docs_model_cache.html) - cache the models on first load and re-use models from cache on subsequent loads
+- [metrics](https://docs.openvino.ai/2023.0/ovms_docs_metrics.html) - metrics compatible with Prometheus standard
**Note:** OVMS has been tested on RedHat, and Ubuntu. The latest publicly released docker images are based on Ubuntu and UBI.
@@ -38,26 +45,26 @@ They are stored in:
## Run OpenVINO Model Server
-A demonstration on how to use OpenVINO Model Server can be found in [our quick-start guide](https://docs.openvino.ai/2022.2/ovms_docs_quick_start_guide.html).
+A demonstration on how to use OpenVINO Model Server can be found in [our quick-start guide](https://docs.openvino.ai/2023.0/ovms_docs_quick_start_guide.html).
For more information on using Model Server in various scenarios you can check the following guides:
-* [Model repository configuration](https://docs.openvino.ai/2022.2/ovms_docs_models_repository.html)
+* [Model repository configuration](https://docs.openvino.ai/2023.0/ovms_docs_models_repository.html)
-* [Deployment options](https://docs.openvino.ai/2022.3/ovms_docs_deploying_server.html)
+* [Deployment options](https://docs.openvino.ai/2023.0/ovms_docs_deploying_server.html)
-* [Performance tuning](https://docs.openvino.ai/2022.2/ovms_docs_performance_tuning.html)
+* [Performance tuning](https://docs.openvino.ai/2023.0/ovms_docs_performance_tuning.html)
-* [Directed Acyclic Graph Scheduler](https://docs.openvino.ai/2022.2/ovms_docs_dag.html)
+* [Directed Acyclic Graph Scheduler](https://docs.openvino.ai/2023.0/ovms_docs_dag.html)
-* [Custom nodes development](https://docs.openvino.ai/2022.2/ovms_docs_custom_node_development.html)
+* [Custom nodes development](https://docs.openvino.ai/2023.0/ovms_docs_custom_node_development.html)
-* [Serving stateful models](https://docs.openvino.ai/2022.2/ovms_docs_stateful_models.html)
+* [Serving stateful models](https://docs.openvino.ai/2023.0/ovms_docs_stateful_models.html)
* [Deploy using a Kubernetes Helm Chart](https://github.com/openvinotoolkit/operator/tree/main/helm-charts/ovms)
* [Deployment using Kubernetes Operator](https://operatorhub.io/operator/ovms-operator)
-* [Using binary input data](https://docs.openvino.ai/2022.2/ovms_docs_binary_input.html)
+* [Using binary input data](https://docs.openvino.ai/2023.0/ovms_docs_binary_input.html)
@@ -71,7 +78,7 @@ For more information on using Model Server in various scenarios you can check th
* [RESTful API](https://restfulapi.net/)
-* [Benchmarking results](https://docs.openvino.ai/2022.1/openvino_docs_performance_benchmarks_ovms.html)
+* [Benchmarking results](https://docs.openvino.ai/2023.0/openvino_docs_performance_benchmarks_ovms.html)
* [Speed and Scale AI Inference Operations Across Multiple Architectures](https://techdecoded.intel.io/essentials/speed-and-scale-ai-inference-operations-across-multiple-architectures/?elq_cid=3646480_ts1607680426276&erpm_id=6470692_ts1607680426276) - webinar recording
@@ -70,6 +70,109 @@ git_repository(
# allow all http methods
+########################################################### Mediapipe
+ name = "com_google_protobuf",
+ sha256 = "87407cd28e7a9c95d9f61a098a53cf031109d451a7763e7dd1253abf8b4df422",
+ strip_prefix = "protobuf-3.19.1",
+ urls = ["https://github.com/protocolbuffers/protobuf/archive/v3.19.1.tar.gz"],
+ #patches = [
+ # "@//third_party:com_google_protobuf_fixes.diff"
+ #],
+ #patch_args = [
+ # "-p1",
+ #],
+################################### Official mediapipe repository #########
+#### Will be used on feature release
+ name = "mediapipe",
+ remote = "https://github.com/google/mediapipe",
+ tag = "v0.9.1",
+# DEV mediapipe 1 source - adjust local repository path for build
+# name = "mediapipe",
+# path = "/mediapipe/",
+# Protobuf for Node dependencies
+ name = "rules_proto_grpc",
+ sha256 = "bbe4db93499f5c9414926e46f9e35016999a4e9f6e3522482d3760dc61011070",
+ strip_prefix = "rules_proto_grpc-4.2.0",
+ urls = ["https://github.com/rules-proto-grpc/rules_proto_grpc/archive/4.2.0.tar.gz"],
+# Node dependencies
+ name = "build_bazel_rules_nodejs",
+ sha256 = "5aae76dced38f784b58d9776e4ab12278bc156a9ed2b1d9fcd3e39921dc88fda",
+ urls = ["https://github.com/bazelbuild/rules_nodejs/releases/download/5.7.1/rules_nodejs-5.7.1.tar.gz"],
+load("@build_bazel_rules_nodejs//:repositories.bzl", "build_bazel_rules_nodejs_dependencies")
+# fetches nodejs, npm, and yarn
+load("@build_bazel_rules_nodejs//:index.bzl", "node_repositories", "yarn_install")
+ name = "npm",
+ package_json = "//:package.json",
+ yarn_lock = "//:yarn.lock",
+ name = "com_google_protobuf_javascript",
+ sha256 = "35bca1729532b0a77280bf28ab5937438e3dcccd6b31a282d9ae84c896b6f6e3",
+ strip_prefix = "protobuf-javascript-3.21.2",
+ urls = ["https://github.com/protocolbuffers/protobuf-javascript/archive/refs/tags/v3.21.2.tar.gz"],
+ name = "rules_foreign_cc",
+ strip_prefix = "rules_foreign_cc-0.1.0",
+ url = "https://github.com/bazelbuild/rules_foreign_cc/archive/0.1.0.zip",
+load("@rules_foreign_cc//:workspace_definitions.bzl", "rules_foreign_cc_dependencies")
+# gflags needed by glog
+ name = "com_github_gflags_gflags",
+ strip_prefix = "gflags-2.2.2",
+ sha256 = "19713a36c9f32b33df59d1c79b4958434cb005b5b47dc5400a7a4b078111d9b5",
+ url = "https://github.com/gflags/gflags/archive/v2.2.2.zip",
+ name = "com_github_glog_glog",
+ remote = "https://github.com/google/glog",
+ tag = "v0.5.0",
+load("@mediapipe//third_party:external_files.bzl", "external_files")
+ name = "linux_openvino",
+ build_file = "@//third_party/openvino:BUILD",
+ path = "/opt/intel/openvino/runtime",
+ name = "linux_opencv",
+ build_file = "@//third_party/opencv:BUILD",
+ path = "/opt/opencv/",
+########################################################### Mediapipe end
# minitrace
name = "minitrace",
@@ -179,7 +282,7 @@ grpc_extra_deps()
# cxxopts
- name = "cxxopts",
+ name = "com_github_jarro2783_cxxopts",
url = "https://github.com/jarro2783/cxxopts/archive/v2.2.0.zip",
sha256 = "f9640c00d9938bedb291a21f9287902a3a8cee38db6910b905f8eba4a6416204",
strip_prefix = "cxxopts-2.2.0",
@@ -188,7 +291,7 @@ http_archive(
# RapidJSON
- name = "rapidjson",
+ name = "com_github_tencent_rapidjson",
url = "https://github.com/Tencent/rapidjson/archive/v1.1.0.zip",
sha256 = "8e00c38829d6785a2dfb951bb87c6974fa07dfe488aa5b25deec4b8bc0f6a3ab",
strip_prefix = "rapidjson-1.1.0",
@@ -197,7 +300,7 @@ http_archive(
# spdlog
- name = "spdlog",
+ name = "com_github_gabime_spdlog",
url = "https://github.com/gabime/spdlog/archive/v1.4.0.tar.gz",
sha256 = "afd18f62d1bc466c60bef088e6b637b0284be88c515cedc59ad4554150af6043",
strip_prefix = "spdlog-1.4.0",
@@ -248,3 +351,32 @@ new_local_repository(
path = "/opt/opencv",
################## END OF OPENCV DEPENDENCY ##########
+ name = "model_api",
+ remote = "https:///github.com/openvinotoolkit/model_api/",
+ build_file_content = """
+ name = "adapter_api",
+ hdrs = ["model_api/cpp/adapters/include/adapters/inference_adapter.h",],
+ includes = ["model_api/cpp/adapters/include"],
+ deps = ["@linux_openvino//:openvino"],
+ visibility = ["//visibility:public"],
+ """,
+ commit = "7e163416c60ba9ccdf440c6c049d6c7e7137e144"
+ name = "oneTBB",
+ branch = "v2021.8.0",
+ remote = "https://github.com/oneapi-src/oneTBB/",
+ patch_args = ["-p1"],
+ patches = ["mwaitpkg.patch",]
+ name = "mediapipe_calculators",
+ build_file = "@//third_party/mediapipe_calculators:BUILD",
+ path = "/ovms/third_party/mediapipe_calculators",
@@ -30,7 +30,10 @@ RUN cd $(bazel info output_base)/external/build_bazel_rules_apple/ && patch -p1
WORKDIR /ovms/
RUN /cov/bin/cov-configure --gcc --config coverity_config.xml && \
/cov/bin/cov-configure --comptype gcc --compiler /usr/bin/gcc && \
- /cov/bin/cov-build --dir cov-int bash -c 'bazel shutdown; bazel clean; bazel build --spawn_strategy=standalone //src:static_analysis && cmake /client/cpp/kserve-api && make --jobs=$(nproc); \
+ /cov/bin/cov-build --dir cov-int bash -c 'bazel shutdown; bazel clean; bazel build --spawn_strategy=standalone //src:static_analysis && cmake /client/cpp/kserve-api && make --jobs=$(nproc) && \
+ cd /ovms/src/custom_nodes/tokenizer && \
+ mkdir build && cd build && \
+ cmake .. && make --jobs=$(nproc) && \
cd /example_cpp_client/cpp && \
bazel build --spawn_strategy=standalone //src:all'
diff --git a/ci/bandit.sh b/ci/bandit.sh
new file mode 100755
index 0000000000..14a5196f64
--- /dev/null
+++ b/ci/bandit.sh
@@ -0,0 +1,37 @@
+# Copyright (c) 2023 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# See the License for the specific language governing permissions and
+# limitations under the License.
+function error_handle {
+function cleanup {
+ echo "Cleaning up bandit scan"
+trap error_handle ERR
+trap cleanup EXIT
+bandit -x demos/benchmark/python -r demos/*/python > ${BANDIT_RESULTS}
+if ! grep -FRq "No issues identified." ${BANDIT_RESULTS}; then
+ echo "Bandit scan failed for demos";
+ exit 1;
+bandit -r client/python/ > ${BANDIT_RESULTS}
+if ! grep -FRq "No issues identified." ${BANDIT_RESULTS}; then
+ echo "Bandit scan failed for client";
+ exit 2;
+exit 0
index 2c41c30be8..c3fdea87e7 100755
--- a/check_coverage.bat
+++ b/ci/check_coverage.bat
@@ -1,12 +1,12 @@
LINES_COV=`cat genhtml/index.html | grep "headerCovTableEntry.*%" | grep -oP ">\K(\d*.\d*) " | head -n 1`
FUNC_COV=`cat genhtml/index.html | grep "headerCovTableEntry.*%" | grep -oP ">\K(\d*.\d*) " | tail -n 1`
--- a/cppclean.sh
+++ b/ci/cppclean.sh
@@ -16,37 +16,55 @@
-cppclean ./src/ 2>&1 | grep -v test > ${CPPCLEAN_RESULTS_FILE_SRC};
-cppclean ./src/ 2>&1 | grep test > ${CPPCLEAN_RESULTS_FILE_TEST};
+cppclean ./src/ 2>&1 | grep -v "unable to find" | grep -v test > ${CPPCLEAN_RESULTS_FILE_SRC};
+cppclean ./src/ 2>&1 | grep -v "unable to find" | grep test > ${CPPCLEAN_RESULTS_FILE_TEST};
NO_WARNINGS=$(wc -l ${CPPCLEAN_RESULTS_FILE_SRC} | awk '{print $1}')
-NO_WARNINGS_TEST=$(wc -l ${CPPCLEAN_RESULTS_FILE_TEST} | awk '{print $1}')
NO_WARNINGS_FORWARD=$(grep "use a forward declaration instead" ${CPPCLEAN_RESULTS_FILE_SRC} | wc -l)
NO_WARNINGS_DIRECT=$(grep "not found in any directly #included header" ${CPPCLEAN_RESULTS_FILE_SRC} | wc -l)
+NO_WARNINGS_TEST=$(wc -l ${CPPCLEAN_RESULTS_FILE_TEST} | awk '{print $1}')
+NO_WARNINGS_TEST_FORWARD=$(grep "use a forward declaration instead" ${CPPCLEAN_RESULTS_FILE_TEST} | wc -l)
+NO_WARNINGS_TEST_DIRECT=$(grep "not found in any directly #included header" ${CPPCLEAN_RESULTS_FILE_TEST} | wc -l)
echo "Number of warnings:" ${NO_WARNINGS}
echo "Number of warnings in tests:" ${NO_WARNINGS_TEST}
echo "Number of warnings about not using forward delares:" ${NO_WARNINGS_FORWARD}
echo "Number of warnings about not direct includes:" ${NO_WARNINGS_DIRECT}
echo "Number of warnings about not used: " ${NO_WARNINGS_NOTUSED}
+echo "Number of warnings in tests about not using forward delares:" ${NO_WARNINGS_TEST_FORWARD}
+echo "Number of warnings in tests about not direct includes:" ${NO_WARNINGS_TEST_DIRECT}
+echo "Number of warnings in tests about not used: " ${NO_WARNINGS_TEST_NOTUSED}
trap "cat ${CPPCLEAN_RESULTS_FILE_SRC}" err exit
-if [ ${NO_WARNINGS_FORWARD} -gt 6 ]; then
+if [ ${NO_WARNINGS_FORWARD} -gt 7 ]; then
echo "Failed due to not using forward declarations where possible: ${NO_WARNINGS_FORWARD}";
exit 1;
-if [ ${NO_WARNINGS_DIRECT} -gt 14 ]; then
+if [ ${NO_WARNINGS_DIRECT} -gt 13 ]; then
echo "Failed probably due to not using static keyword with functions definitions: ${NO_WARNINGS_DIRECT}";
exit 1;
-if [ ${NO_WARNINGS_NOTUSED} -gt 3 ]; then
+if [ ${NO_WARNINGS_NOTUSED} -gt 4 ]; then
echo "Failed probably due to unnecessary forward includes: ${NO_WARNINGS_NOTUSED}";
exit 1;
-if [ ${NO_WARNINGS} -gt 196 ]; then
+if [ ${NO_WARNINGS_TEST_FORWARD} -gt 1 ]; then
+ echo "Failed due to not using forward declarations where possible: ${NO_WARNINGS_TEST_FORWARD}";
+ exit 1;
+if [ ${NO_WARNINGS_TEST_DIRECT} -gt 12 ]; then
+ echo "Failed probably due to not using static keyword with functions definitions: ${NO_WARNINGS_TEST_DIRECT}";
+ exit 1;
+if [ ${NO_WARNINGS_TEST_NOTUSED} -gt 0 ]; then
+ echo "Failed probably due to unnecessary forward includes: ${NO_WARNINGS_TEST_NOTUSED}";
+ exit 1;
+if [ ${NO_WARNINGS} -gt 159 ]; then
echo "Failed due to higher than allowed number of issues in code: ${NO_WARNINGS}"
exit 1
-if [ ${NO_WARNINGS_TEST} -gt 128 ]; then
+if [ ${NO_WARNINGS_TEST} -gt 55 ]; then
echo "Failed due to higher than allowed number of issues in test code: ${NO_WARNINGS_TEST}"
exit 1
index c6bc38ba8e..ab6bf6b24a 100644
--- a/lib_search.py
+++ b/ci/lib_search.py
@@ -90,6 +90,7 @@ def check_dir(start_dir):
+ 'passthrough.xml',
@@ -114,6 +115,11 @@ def check_dir(start_dir):
+ "mwaitpkg.patch",
+ 'saved_model.pb',
+ "yarn.lock",
+ "BUILD.bazel",
+ "package.json",
exclude_directories = ['/dist/', 'extras/ovms-operator', 'extras/openvino-operator-openshift', 'release_files/thirdparty-licenses']
@@ -169,6 +175,7 @@ def check_func(start_dir):
+ 'passthrough.xml',
@@ -189,6 +196,9 @@ def check_func(start_dir):
+ 'yarn.lock',
+ 'BUILD.bazel',
+ 'package.json',
exclude_directories = ['/dist/', 'extras/ovms-operator']
@@ -62,7 +62,7 @@ namespace tc = triton::client;
} \
-std::vector load(const std::string& fileName) {
+std::string load(const std::string& fileName) {
std::ifstream file(fileName, std::ios::binary);
std::streampos fileSize;
@@ -70,16 +70,13 @@ std::vector load(const std::string& fileName) {
file.seekg(0, std::ios::end);
fileSize = file.tellg();
file.seekg(0, std::ios::beg);
+ std::ostringstream oss;
+ oss << file.rdbuf();
- std::vector vec;
- vec.reserve(fileSize);
- vec.insert(vec.begin(),
- std::istream_iterator(file),
- std::istream_iterator());
- return vec;
+ return oss.str();
int main(int argc, char** argv) {
cxxopts::Options opt("grpc_async_infer_resnet", "Sends requests via KServe gRPC API.");
@@ -151,14 +148,19 @@ int main(int argc, char** argv) {
std::vector shape{1};
- // Initialize the inputs with the data.
- tc::InferInput* input;
+ std::vector inputs;
+ std::vector> input_ptrs;
+ for (int i = 0; i < imgs.size(); i++) {
+ tc::InferInput* input;
+ inputs.push_back(input);
- tc::InferInput::Create(&input, input_name, shape, "BYTES"),
- "unable to get input");
- std::shared_ptr input_ptr;
- input_ptr.reset(input);
+ tc::InferInput::Create(&input, input_name, shape, "BYTES"),
+ "unable to get input");
+ std::shared_ptr input_ptr;
+ input_ptr.reset(input);
+ input_ptrs.push_back(input_ptr);
+ }
tc::InferOptions options(model_name);
if (args.count("model_version"))
@@ -169,7 +171,6 @@ int main(int argc, char** argv) {
std::cerr << "The provided argument is of a wrong type" << std::endl;
return 1;
- std::vector inputs = {input_ptr.get()};
std::vector classes;
std::ifstream lb_f(args["labels_list"].as());
@@ -178,7 +179,7 @@ int main(int argc, char** argv) {
- std::vector> input_data;
+ std::vector input_data;
for (int i = 0; i < imgs.size(); i++) {
try {
@@ -196,8 +197,9 @@ int main(int argc, char** argv) {
int completedRequestCount = 0;
auto start = std::chrono::high_resolution_clock::now();
for (int i = 0; i < imgs.size(); i++) {
+ std::vector inputs = {input_ptrs[i].get()};
- input_ptr->AppendRaw(input_data[i]),
+ input_ptrs[i]->AppendFromString({input_data[i]}),
"unable to set data for input");
[&, i](tc::InferResult* result) -> int {
@@ -231,7 +233,6 @@ int main(int argc, char** argv) {
return 0;
options, inputs);
- input->Reset();
std::unique_lock lk(mtx);
@@ -59,7 +59,7 @@ namespace tc = triton::client;
} \
-std::vector load(const std::string& fileName) {
+std::string load(const std::string& fileName) {
std::ifstream file(fileName, std::ios::binary);
std::streampos fileSize;
@@ -67,14 +67,10 @@ std::vector load(const std::string& fileName) {
file.seekg(0, std::ios::end);
fileSize = file.tellg();
file.seekg(0, std::ios::beg);
+ std::ostringstream oss;
+ oss << file.rdbuf();
- std::vector vec;
- vec.reserve(fileSize);
- vec.insert(vec.begin(),
- std::istream_iterator(file),
- std::istream_iterator());
- return vec;
+ return oss.str();
int main(int argc, char** argv) {
@@ -171,7 +167,7 @@ int main(int argc, char** argv) {
std::vector results;
for (int i = 0; i < imgs.size(); i++) {
- std::vector input_data;
+ std::string input_data;
try {
input_data = load(imgs[i]);
@@ -180,7 +176,7 @@ int main(int argc, char** argv) {
return 1;
- input_ptr->AppendRaw(input_data),
+ input_ptr->AppendFromString({input_data}),
"unable to set data for input");
client->Infer(&(results[i]), options, inputs),
diff --git a/client/cpp/kserve-api/samples/http_async_infer_resnet.cpp b/client/cpp/kserve-api/samples/http_async_infer_resnet.cpp
} \
-std::vector load(const std::string& fileName) {
+std::string load(const std::string& fileName) {
std::ifstream file(fileName, std::ios::binary);
std::streampos fileSize;
@@ -70,16 +70,13 @@ std::vector load(const std::string& fileName) {
file.seekg(0, std::ios::end);
fileSize = file.tellg();
file.seekg(0, std::ios::beg);
+ std::ostringstream oss;
+ oss << file.rdbuf();
- std::vector vec;
- vec.reserve(fileSize);
- vec.insert(vec.begin(),
- std::istream_iterator(file),
- std::istream_iterator());
- return vec;
+ return oss.str();
int main(int argc, char** argv) {
cxxopts::Options opt("http_async_infer_resnet", "Sends requests via KServe REST API.");
@@ -151,14 +148,19 @@ int main(int argc, char** argv) {
std::vector shape{1};
- // Initialize the inputs with the data.
- tc::InferInput* input;
+ std::vector inputs;
+ std::vector> input_ptrs;
+ for (int i = 0; i < imgs.size(); i++) {
+ tc::InferInput* input;
+ inputs.push_back(input);
- tc::InferInput::Create(&input, input_name, shape, "BYTES"),
- "unable to get input");
- std::shared_ptr input_ptr;
- input_ptr.reset(input);
+ tc::InferInput::Create(&input, input_name, shape, "BYTES"),
+ "unable to get input");
+ std::shared_ptr input_ptr;
+ input_ptr.reset(input);
+ input_ptrs.push_back(input_ptr);
+ }
tc::InferOptions options(model_name);
if (args.count("model_version"))
@@ -169,7 +171,6 @@ int main(int argc, char** argv) {
std::cerr << "The provided argument is of a wrong type" << std::endl;
return 1;
- std::vector inputs = {input_ptr.get()};
std::vector classes;
std::ifstream lb_f(args["labels_list"].as());
@@ -187,7 +188,7 @@ int main(int argc, char** argv) {
std::vector outputs = {output_ptr.get()};
- std::vector> input_data;
+ std::vector input_data;
for (int i = 0; i < imgs.size(); i++) {
try {
@@ -204,8 +205,9 @@ int main(int argc, char** argv) {
int completedRequestCount = 0;
auto start = std::chrono::high_resolution_clock::now();
for (int i = 0; i < imgs.size(); i++) {
+ std::vector inputs = {input_ptrs[i].get()};
- input_ptr->AppendRaw(input_data[i]),
+ input_ptrs[i]->AppendFromString({input_data[i]}),
"unable to set data for input");
[&, i](tc::InferResult* result) -> int {
@@ -239,7 +241,6 @@ int main(int argc, char** argv) {
return 0;
options, inputs, outputs);
- input->Reset();
std::unique_lock lk(mtx);
diff --git a/client/cpp/kserve-api/samples/http_infer_resnet.cpp b/client/cpp/kserve-api/samples/http_infer_resnet.cpp
} \
-std::vector load(const std::string& fileName) {
+std::string load(const std::string& fileName) {
std::ifstream file(fileName, std::ios::binary);
std::streampos fileSize;
@@ -67,16 +67,13 @@ std::vector load(const std::string& fileName) {
file.seekg(0, std::ios::end);
fileSize = file.tellg();
file.seekg(0, std::ios::beg);
+ std::ostringstream oss;
+ oss << file.rdbuf();
- std::vector vec;
- vec.reserve(fileSize);
- vec.insert(vec.begin(),
- std::istream_iterator(file),
- std::istream_iterator());
- return vec;
+ return oss.str();
int main(int argc, char** argv) {
cxxopts::Options opt("http_infer_resnet", "Sends requests via KServe REST API.");
@@ -181,7 +178,7 @@ int main(int argc, char** argv) {
std::vector results;
for (int i = 0; i < imgs.size(); i++) {
- std::vector input_data;
+ std::string input_data;
try {
input_data = load(imgs[i]);
@@ -190,7 +187,7 @@ int main(int argc, char** argv) {
return 1;
- input_ptr->AppendRaw(input_data),
+ input_ptr->AppendFromString({input_data}),
"unable to set data for input");
client->Infer(&(results[i]), options, inputs, outputs),
diff --git a/client/go/kserve-api/grpc_infer_resnet.go b/client/go/kserve-api/grpc_infer_resnet.go
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
+ bytes, err := ioutil.ReadFile(fileName)
+ contents := grpc_client.InferTensorContents{}
+ contents.BytesContents = append(contents.BytesContents, bytes)
+ inferInput := grpc_client.ModelInferRequest_InferInputTensor{
+ Name: "0",
+ Datatype: "BYTES",
+ Shape: []int64{1},
+ Contents: &contents,
+ }
// Create request input tensors
inferInputs := []*grpc_client.ModelInferRequest_InferInputTensor{
- &grpc_client.ModelInferRequest_InferInputTensor{
- Name: "0",
- Datatype: "BYTES",
- Shape: []int64{1},
- },
+ &inferInput,
// Create inference request for specific model/version
@@ -100,11 +107,6 @@ func ModelInferRequest(client grpc_client.GRPCInferenceServiceClient, fileName s
Inputs: inferInputs,
- bytes, err := ioutil.ReadFile(fileName)
- modelInferRequest.RawInputContents = append(modelInferRequest.RawInputContents, bytes)
// Submit inference request to server
modelInferResponse, err := client.ModelInfer(ctx, &modelInferRequest)
if err != nil {
diff --git a/client/java/kserve-api/README.md b/client/java/kserve-api/README.md
- Usage Example
-java -cp target/grpc-client.jar clients.grpc_infer_resnet -imgs ./common/resnet_input_images.txt -lbs ../../../common/resnet_labels.txt --grpc_port 9000
+java -cp target/grpc-client.jar clients.grpc_infer_resnet -imgs ./resnet_input_images.txt -lbs ../../common/resnet_labels.txt --grpc_port 9000
../../../demos/common/static/images/airliner.jpeg classified as 404 airliner
../../../demos/common/static/images/arctic-fox.jpeg classified as 279 Arctic fox, white fox, Alopex lagopus
../../../demos/common/static/images/bee.jpeg classified as 309 bee
diff --git a/client/java/kserve-api/src/main/java/clients/grpc_infer_resnet.java b/client/java/kserve-api/src/main/java/clients/grpc_infer_resnet.java
import com.google.protobuf.ByteString;
+import inference.GrpcPredictV2.InferTensorContents;
import inference.GRPCInferenceServiceGrpc;
import inference.GRPCInferenceServiceGrpc.GRPCInferenceServiceBlockingStub;
import inference.GrpcPredictV2.ModelInferRequest;
@@ -102,8 +103,6 @@ public static void main(String[] args) {
- request.addInputs(0, input);
List labels = new ArrayList<>();
try (FileInputStream fis = new FileInputStream(cmd.getOptionValue("lbs"))) {
try (Scanner sc = new Scanner(fis)) {
@@ -118,13 +117,15 @@ public static void main(String[] args) {
try (FileInputStream fis = new FileInputStream(cmd.getOptionValue("imgs"))) {
try (Scanner sc = new Scanner(fis)) {
while (sc.hasNext()) {
+ request.clearInputs();
String[] line = sc.nextLine().split(" ");
String fileName = line[0];
int label = Integer.parseInt(line[1]);
FileInputStream imageStream = new FileInputStream(fileName);
- request.clearRawInputContents();
- request.addRawInputContents(ByteString.readFrom(imageStream));
+ InferTensorContents.Builder input_data = InferTensorContents.newBuilder();
+ input_data.addBytesContents(ByteString.readFrom(imageStream));
+ input.setContents(input_data);
+ request.addInputs(0, input);
ModelInferResponse response = grpc_stub.modelInfer(request.build());
diff --git a/client/python/kserve-api/samples/README.md b/client/python/kserve-api/samples/README.md
python3 grpc_infer_binary_resnet.py --help
-usage: grpc_infer_binary_resnet.py [-h] [--images_list IMAGES_LIST] [--labels_numpy_path LABELS_NUMPY_PATH] [--grpc_address GRPC_ADDRESS]
- [--grpc_port GRPC_PORT] [--input_name INPUT_NAME] [--output_name OUTPUT_NAME] [--batchsize BATCHSIZE]
- [--model_name MODEL_NAME] [--pipeline_name PIPELINE_NAME]
+usage: grpc_infer_binary_resnet.py [-h] [--images_list IMAGES_LIST] [--grpc_address GRPC_ADDRESS] [--grpc_port GRPC_PORT] [--input_name INPUT_NAME] [--output_name OUTPUT_NAME] [--batchsize BATCHSIZE]
+ [--model_name MODEL_NAME] [--pipeline_name PIPELINE_NAME] [--tls]
Sends requests via KServe gRPC API using images in format supported by OpenCV. It displays performance statistics and optionally the model accuracy
@@ -339,8 +338,6 @@ optional arguments:
-h, --help show this help message and exit
--images_list IMAGES_LIST
path to a file with a list of labeled images
- --labels_numpy_path LABELS_NUMPY_PATH
- numpy in shape [n,1] - can be used to check model accuracy
--grpc_address GRPC_ADDRESS
Specify url to grpc service. default:localhost
--grpc_port GRPC_PORT
@@ -355,12 +352,13 @@ optional arguments:
Define model name, must be same as is in service. default: resnet
--pipeline_name PIPELINE_NAME
Define pipeline name, must be same as is in service
+ --tls use TLS communication with GRPC endpoint
- Usage Example
-python3 grpc_infer_binary_resnet.py --grpc_port 9000 --images_list ../../resnet_input_images.txt --labels_numpy_path ../../lbs.npy --input_name 0 --output_name 1463 --model_name resnet
+python3 grpc_infer_binary_resnet.py --grpc_port 9000 --images_list ../../resnet_input_images.txt --input_name 0 --output_name 1463 --model_name resnet
Start processing:
Model name: resnet
Iteration 0; Processing time: 27.09 ms; speed 36.92 fps
@@ -794,9 +792,8 @@ Classification accuracy: 100.00
python3 ./http_infer_binary_resnet.py --help
-usage: http_infer_binary_resnet.py [-h] [--images_list IMAGES_LIST] [--labels_numpy_path LABELS_NUMPY_PATH] [--http_address HTTP_ADDRESS]
- [--http_port HTTP_PORT] [--input_name INPUT_NAME] [--output_name OUTPUT_NAME] [--batchsize BATCHSIZE]
- [--model_name MODEL_NAME] [--pipeline_name PIPELINE_NAME]
+usage: http_infer_binary_resnet.py [-h] [--images_list IMAGES_LIST] [--http_address HTTP_ADDRESS] [--http_port HTTP_PORT] [--input_name INPUT_NAME] [--output_name OUTPUT_NAME] [--batchsize BATCHSIZE]
+ [--model_name MODEL_NAME] [--pipeline_name PIPELINE_NAME] [--tls] [--server_cert SERVER_CERT] [--client_cert CLIENT_CERT] [--client_key CLIENT_KEY]
Sends requests via KServe REST API using binary encoded images. It displays performance statistics and optionally the model accuracy
@@ -804,8 +801,6 @@ optional arguments:
-h, --help show this help message and exit
--images_list IMAGES_LIST
path to a file with a list of labeled images
- --labels_numpy_path LABELS_NUMPY_PATH
- numpy in shape [n,1] - can be used to check model accuracy
--http_address HTTP_ADDRESS
Specify url to http service. default:localhost
--http_port HTTP_PORT
@@ -820,12 +815,19 @@ optional arguments:
Define model name, must be same as is in service. default: resnet
--pipeline_name PIPELINE_NAME
Define pipeline name, must be same as is in service
+ --tls use TLS communication with HTTP endpoint
+ --server_cert SERVER_CERT
+ Path to server certificate
+ --client_cert CLIENT_CERT
+ Path to client certificate
+ --client_key CLIENT_KEY
+ Path to client key
- Usage Example
-python3 ./http_infer_binary_resnet.py --http_port 8000 --images_list ../../resnet_input_images.txt --labels_numpy_path ../../lbs.npy --input_name 0 --output_name 1463 --model_name resnet
+python3 ./http_infer_binary_resnet.py --http_port 8000 --images_list ../../resnet_input_images.txt --input_name 0 --output_name 1463 --model_name resnet
Start processing:
Model name: resnet
Iteration 0; Processing time: 38.61 ms; speed 25.90 fps
diff --git a/client/python/kserve-api/samples/grpc_infer_binary_resnet.py b/client/python/kserve-api/samples/grpc_infer_binary_resnet.py
import sys
-import ast
-import grpc
import numpy as np
import classes
import datetime
import argparse
from client_utils import print_statistics
-from tritonclient.grpc import service_pb2, service_pb2_grpc
-from tritonclient.utils import *
-DataTypeToContentsFieldName = {
- 'BOOL' : 'bool_contents',
- 'BYTES' : 'bytes_contents',
- 'FP32' : 'fp32_contents',
- 'FP64' : 'fp64_contents',
- 'INT64' : 'int64_contents',
- 'INT32' : 'int_contents',
- 'UINT64' : 'uint64_contents',
- 'UINT32' : 'uint_contents',
- 'INT64' : 'int64_contents',
- 'INT32' : 'int_contents',
-def as_numpy(response, name):
- index = 0
- for output in response.outputs:
- if output.name == name:
- shape = []
- for value in output.shape:
- shape.append(value)
- datatype = output.datatype
- field_name = DataTypeToContentsFieldName[datatype]
- contents = getattr(output, "contents")
- contents = getattr(contents, f"{field_name}")
- if index < len(response.raw_output_contents):
- np_array = np.frombuffer(
- response.raw_output_contents[index], dtype=triton_to_np_dtype(output.datatype))
- elif len(contents) != 0:
- np_array = np.array(contents,
- copy=False)
- else:
- np_array = np.empty(0)
- np_array = np_array.reshape(shape)
- return np_array
- else:
- index += 1
- return None
+import tritonclient.grpc as grpclient
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Sends requests via KServe gRPC API using images in format supported by OpenCV. '
'It displays performance statistics and optionally the model accuracy')
parser.add_argument('--images_list', required=False, default='input_images.txt', help='path to a file with a list of labeled images')
- parser.add_argument('--labels_numpy_path', required=False, help='numpy in shape [n,1] - can be used to check model accuracy')
parser.add_argument('--grpc_address',required=False, default='localhost', help='Specify url to grpc service. default:localhost')
parser.add_argument('--grpc_port',required=False, default=9000, help='Specify port to grpc service. default: 9000')
parser.add_argument('--input_name',required=False, default='input', help='Specify input tensor name. default: input')
@@ -82,10 +42,14 @@ def as_numpy(response, name):
parser.add_argument('--pipeline_name', default='', help='Define pipeline name, must be same as is in service',
+ parser.add_argument('--tls', default=False, action='store_true', help='use TLS communication with GRPC endpoint')
+ error = False
args = vars(parser.parse_args())
address = "{}:{}".format(args['grpc_address'],args['grpc_port'])
+ input_name = args['input_name']
+ output_name = args['output_name']
processing_times = np.zeros((0),int)
@@ -96,10 +60,6 @@ def as_numpy(response, name):
while batch_size > len(lines):
lines += lines
- if args.get('labels_numpy_path') is not None:
- lbs = np.load(args['labels_numpy_path'], mmap_mode='r', allow_pickle=False)
- matched_count = 0
- total_executed = 0
batch_size = int(args.get('batchsize'))
print('Start processing:')
@@ -108,76 +68,79 @@ def as_numpy(response, name):
iteration = 0
is_pipeline_request = bool(args.get('pipeline_name'))
- # Create gRPC stub for communicating with the server
- channel = grpc.insecure_channel(address)
- grpc_stub = service_pb2_grpc.GRPCInferenceServiceStub(channel)
+ model_name = args.get('pipeline_name') if is_pipeline_request else args.get('model_name')
+ try:
+ triton_client = grpclient.InferenceServerClient(
+ url=address,
+ ssl=args['tls'],
+ verbose=False)
+ except Exception as e:
+ print("context creation failed: " + str(e))
+ sys.exit()
+ processing_times = np.zeros((0),int)
+ total_executed = 0
+ matched_count = 0
batch_i = 0
image_data = []
+ image_binary_size = []
labels = []
for line in lines:
+ inputs = []
batch_i += 1
path, label = line.strip().split(" ")
with open(path, 'rb') as f:
- labels.append(label)
+ labels.append(int(label))
if batch_i < batch_size:
- inputs = []
- inputs.append(service_pb2.ModelInferRequest().InferInputTensor())
- inputs[0].name = args['input_name']
- inputs[0].datatype = "BYTES"
- inputs[0].shape.extend([1])
- inputs[0].contents.bytes_contents.append(image_data[0])
+ inputs.append(grpclient.InferInput(args['input_name'], [batch_i], "BYTES"))
outputs = []
- outputs.append(service_pb2.ModelInferRequest().InferRequestedOutputTensor())
- outputs[0].name = "prob"
- request = service_pb2.ModelInferRequest()
- request.model_name = args.get('pipeline_name') if is_pipeline_request else args.get('model_name')
- request.inputs.extend(inputs)
+ outputs.append(grpclient.InferRequestedOutput(output_name))
+ nmpy = np.array(image_data , dtype=np.object_)
+ inputs[0].set_data_from_numpy(nmpy)
start_time = datetime.datetime.now()
- request.outputs.extend(outputs)
- response = grpc_stub.ModelInfer(request)
+ results = triton_client.infer(model_name=model_name,
+ inputs=inputs,
+ outputs=outputs)
end_time = datetime.datetime.now()
duration = (end_time - start_time).total_seconds() * 1000
processing_times = np.append(processing_times,np.array([int(duration)]))
- output = as_numpy(response, args['output_name'])
+ output = results.as_numpy(output_name)
nu = np.array(output)
# for object classification models show imagenet class
print('Iteration {}; Processing time: {:.2f} ms; speed {:.2f} fps'.format(iteration,round(np.average(duration), 2),
- round(1000 * batch_size / np.average(duration), 2)
- ))
+ round(1000 * batch_size / np.average(duration), 2)
+ ))
# Comment out this section for non imagenet datasets
print("imagenet top results in a single batch:")
for i in range(nu.shape[0]):
- lbs_i = iteration * batch_size
- single_result = nu[[i],...]
- offset = 0
- if nu.shape[1] == 1001:
- offset = 1
- ma = np.argmax(single_result) - offset
+ if is_pipeline_request:
+ # shape (1,)
+ print("response shape", output.shape)
+ ma = nu[0] - 1 # indexes needs to be shifted left due to 1x1001 shape
+ else:
+ # shape (1,1000)
+ single_result = nu[[i],...]
+ offset = 0
+ if nu.shape[1] == 1001:
+ offset = 1
+ ma = np.argmax(single_result) - offset
mark_message = ""
- if args.get('labels_numpy_path') is not None:
- total_executed += 1
- if ma == lbs[lbs_i + i]:
- matched_count += 1
- mark_message = "; Correct match."
- else:
- mark_message = "; Incorrect match. Should be {} {}".format(lbs[lbs_i + i], classes.imagenet_classes[lbs[lbs_i + i]] )
- print("\t",i, classes.imagenet_classes[ma],ma, mark_message)
+ total_executed += 1
+ if ma == labels[i]:
+ matched_count += 1
+ mark_message = "; Correct match."
+ else:
+ mark_message = "; Incorrect match. Should be".format(labels[i], classes.imagenet_classes[labels[i]])
+ print("\t", i, classes.imagenet_classes[ma], ma, mark_message)
# Comment out this section for non imagenet datasets
- iteration += 1
- image_data = []
labels = []
+ image_data = []
batch_i = 0
print_statistics(processing_times, batch_size)
- if args.get('labels_numpy_path') is not None:
- print('Classification accuracy: {:.2f}'.format(100*matched_count/total_executed))
+ print('Classification accuracy: {:.2f}'.format(100*matched_count/total_executed))
diff --git a/client/python/kserve-api/samples/http_infer_binary_resnet.py b/client/python/kserve-api/samples/http_infer_binary_resnet.py
import datetime
import argparse
from client_utils import print_statistics
-import requests
-import json
+import tritonclient.http as httpclient
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Sends requests via KServe REST API using binary encoded images. '
'It displays performance statistics and optionally the model accuracy')
parser.add_argument('--images_list', required=False, default='input_images.txt', help='path to a file with a list of labeled images')
- parser.add_argument('--labels_numpy_path', required=False, help='numpy in shape [n,1] - can be used to check model accuracy')
parser.add_argument('--http_address',required=False, default='localhost', help='Specify url to http service. default:localhost')
parser.add_argument('--http_port',required=False, default=8000, help='Specify port to http service. default: 8000')
parser.add_argument('--input_name',required=False, default='input', help='Specify input tensor name. default: input')
@@ -43,6 +42,10 @@
parser.add_argument('--pipeline_name', default='', help='Define pipeline name, must be same as is in service',
+ parser.add_argument('--tls', default=False, action='store_true', help='use TLS communication with HTTP endpoint')
+ parser.add_argument('--server_cert', required=False, help='Path to server certificate', default=None)
+ parser.add_argument('--client_cert', required=False, help='Path to client certificate', default=None)
+ parser.add_argument('--client_key', required=False, help='Path to client key', default=None)
error = False
args = vars(parser.parse_args())
@@ -51,6 +54,15 @@
input_name = args['input_name']
output_name = args['output_name']
+ if args['tls']:
+ ssl_options = {
+ 'keyfile':args['client_key'],
+ 'cert_file':args['client_cert'],
+ 'ca_certs':args['server_cert']
+ }
+ else:
+ ssl_options = None
processing_times = np.zeros((0),int)
input_images = args.get('images_list')
@@ -60,10 +72,6 @@
while batch_size > len(lines):
lines += lines
- if args.get('labels_numpy_path') is not None:
- lbs = np.load(args['labels_numpy_path'], mmap_mode='r', allow_pickle=False)
- matched_count = 0
- total_executed = 0
batch_size = int(args.get('batchsize'))
print('Start processing:')
@@ -74,72 +82,79 @@
model_name = args.get('pipeline_name') if is_pipeline_request else args.get('model_name')
- url = f"http://{address}/v2/models/{model_name}/infer"
- http_session = requests.session()
+ try:
+ triton_client = httpclient.InferenceServerClient(
+ url=address,
+ ssl=args['tls'],
+ ssl_options=ssl_options,
+ verbose=False)
+ except Exception as e:
+ print("context creation failed: " + str(e))
+ sys.exit()
+ processing_times = np.zeros((0),int)
+ total_executed = 0
+ matched_count = 0
batch_i = 0
image_data = []
image_binary_size = []
labels = []
for line in lines:
+ inputs = []
batch_i += 1
path, label = line.strip().split(" ")
with open(path, 'rb') as f:
- image_binary_size.append(len(image_data[-1]))
- labels.append(label)
+ labels.append(int(label))
if batch_i < batch_size:
- image_binary_size_str = ",".join(map(str, image_binary_size))
- inference_header = {"inputs":[{"name":input_name,"shape":[batch_i],"datatype":"BYTES","parameters":{"binary_data_size":image_binary_size_str}}]}
- inference_header_binary = json.dumps(inference_header).encode()
+ inputs.append(httpclient.InferInput(args['input_name'], [batch_i], "BYTES"))
+ outputs = []
+ outputs.append(httpclient.InferRequestedOutput(output_name, binary_data=True))
+ nmpy = np.array(image_data , dtype=np.object_)
+ inputs[0].set_data_from_numpy(nmpy)
start_time = datetime.datetime.now()
- results = http_session.post(url, inference_header_binary + b''.join(image_data), headers={"Inference-Header-Content-Length":str(len(inference_header_binary))})
+ results = triton_client.infer(model_name=model_name,
+ inputs=inputs,
+ outputs=outputs)
end_time = datetime.datetime.now()
duration = (end_time - start_time).total_seconds() * 1000
processing_times = np.append(processing_times,np.array([int(duration)]))
- results_dict = json.loads(results.text)
- if "error" in results_dict.keys():
- print(f"Error: {results_dict['error']}")
- error = True
- break
- output = np.array(json.loads(results.text)['outputs'][0]['data'])
- output_shape = tuple(results_dict['outputs'][0]['shape'])
- nu = np.reshape(output, output_shape)
+ output = results.as_numpy(output_name)
+ nu = np.array(output)
# for object classification models show imagenet class
print('Iteration {}; Processing time: {:.2f} ms; speed {:.2f} fps'.format(iteration,round(np.average(duration), 2),
- round(1000 * batch_size / np.average(duration), 2)
- ))
+ round(1000 * batch_size / np.average(duration), 2)
+ ))
# Comment out this section for non imagenet datasets
print("imagenet top results in a single batch:")
for i in range(nu.shape[0]):
- lbs_i = iteration * batch_size
- single_result = nu[[i],...]
- offset = 0
- if nu.shape[1] == 1001:
- offset = 1
- ma = np.argmax(single_result) - offset
+ if is_pipeline_request:
+ # shape (1,)
+ print("response shape", output.shape)
+ ma = nu[0] - 1 # indexes needs to be shifted left due to 1x1001 shape
+ else:
+ # shape (1,1000)
+ single_result = nu[[i],...]
+ offset = 0
+ if nu.shape[1] == 1001:
+ offset = 1
+ ma = np.argmax(single_result) - offset
mark_message = ""
- if args.get('labels_numpy_path') is not None:
- total_executed += 1
- if ma == lbs[lbs_i + i]:
- matched_count += 1
- mark_message = "; Correct match."
- else:
- mark_message = "; Incorrect match. Should be {} {}".format(lbs[lbs_i + i], classes.imagenet_classes[lbs[lbs_i + i]] )
- print("\t",i, classes.imagenet_classes[ma],ma, mark_message)
+ total_executed += 1
+ if ma == labels[i]:
+ matched_count += 1
+ mark_message = "; Correct match."
+ else:
+ mark_message = "; Incorrect match. Should be".format(labels[i], classes.imagenet_classes[labels[i]])
+ print("\t", i, classes.imagenet_classes[ma], ma, mark_message)
# Comment out this section for non imagenet datasets
- iteration += 1
- image_data = []
- image_binary_size = []
labels = []
+ image_data = []
batch_i = 0
- if not error:
- print_statistics(processing_times, batch_size)
- if args.get('labels_numpy_path') is not None:
- print('Classification accuracy: {:.2f}'.format(100*matched_count/total_executed))
+ print_statistics(processing_times, batch_size)
+ print('Classification accuracy: {:.2f}'.format(100*matched_count/total_executed))
\ No newline at end of file
@@ -1,4 +1,4 @@
diff --git a/client/python/ovmsclient/lib/Makefile b/client/python/ovmsclient/lib/Makefile
VIRTUALENV_DIR := .venv-ovmsclient
-PACKAGE_PATH := dist/ovmsclient-2022.3-py3-none-any.whl
+PACKAGE_PATH := dist/ovmsclient-2023.0-py3-none-any.whl
.PHONY: build-deps build-package build test clean style
diff --git a/client/python/ovmsclient/lib/README.md b/client/python/ovmsclient/lib/README.md
**To install the package run:**
-pip3 install --force-reinstall --no-deps dist/ovmsclient-2022.3-py3-none-any.whl
+pip3 install --force-reinstall --no-deps dist/ovmsclient-2023.0-py3-none-any.whl
*Note*: For development purposes you may want to repeatedly reinstall the package.
@@ -61,7 +61,7 @@ make build-package
- `make test` - runs tests on `ovmsclient` package. By default the package located in `dist/` directory is used. To specify custom package path pass `PACKAGE_PATH` option like:
- `make test PACKAGE_PATH=/opt/packages/ovmsclient-2022.3-py3-none-any.whl`
+ `make test PACKAGE_PATH=/opt/packages/ovmsclient-2023.0-py3-none-any.whl`
make test
diff --git a/client/python/ovmsclient/lib/docs/grpc_client.md b/client/python/ovmsclient/lib/docs/grpc_client.md
- Where `output_name` is a `string` and `prediction_result` is a `numpy ndarray`
+ Where `output_name` is a `string` and `prediction_result` is a `numpy ndarray`. Both strings and binary data are returned as array of `numpy.bytes_` dtype.
@@ -202,12 +202,21 @@ Request prediction on provided inputs.
import ovmsclient
client = ovmsclient.make_grpc_client("localhost:9000")
+# Numeric input
inputs = {"input": [1, 2, 3]}
# request prediction on specific model version, with timeout set to 2.5 seconds
results = client.predict(inputs=inputs, model_name="model", model_version=1, timeout=2.5)
# request prediction on the latest model version
results = client.predict(inputs=inputs, model_name="model")
+# String input
+inputs = {"input": ["We have a really nice", "One, two, three,"]}
+results = client.predict(inputs=inputs, model_name="model")
+# ['We have a really nice', 'One, two, three,']
+# [b'We have a really nice way' b'One, two, three, four']
diff --git a/client/python/ovmsclient/lib/docs/http_client.md b/client/python/ovmsclient/lib/docs/http_client.md
- Where `output_name` is a `string` and `prediction_result` is a `numpy ndarray`
+ Where `output_name` is a `string` and `prediction_result` is a `numpy ndarray`. Both strings and binary data are returned as array of `numpy.bytes_` dtype.
@@ -200,12 +200,21 @@ Request prediction on provided inputs.
import ovmsclient
client = ovmsclient.make_http_client("localhost:9000")
+# Numeric input
inputs = {"input": [1, 2, 3]}
# request prediction on specific model version, with timeout set to 2.5 seconds
results = client.predict(inputs=inputs, model_name="model", model_version=1, timeout=2.5)
# request prediction on the latest model version
results = client.predict(inputs=inputs, model_name="model")
+# String input
+inputs = {"input": ["We have a really nice", "One, two, three,"]}
+results = client.predict(inputs=inputs, model_name="model")
+# ['We have a really nice', 'One, two, three,']
+# [b'We have a really nice way' b'One, two, three, four']
diff --git a/client/python/ovmsclient/lib/docs/pypi_overview.md b/client/python/ovmsclient/lib/docs/pypi_overview.md
The `ovmsclient` can replace `tensorflow-serving-api` package with reduced footprint and simplified interface.
-See [API reference](https://github.com/openvinotoolkit/model_server/blob/releases/2022/3/client/python/ovmsclient/lib/docs/README.md) for usage details.
+See [API reference](https://github.com/openvinotoolkit/model_server/blob/releases/2023/0/client/python/ovmsclient/lib/docs/README.md) for usage details.
## Usage example
@@ -38,4 +38,4 @@ results = client.predict(inputs=inputs, model_name="model")
-Learn more on `ovmsclient` [documentation site](https://github.com/openvinotoolkit/model_server/tree/releases/2022/3/client/python/ovmsclient/lib).
\ No newline at end of file
+Learn more on `ovmsclient` [documentation site](https://github.com/openvinotoolkit/model_server/tree/releases/2023/0/client/python/ovmsclient/lib).
\ No newline at end of file
diff --git a/client/python/ovmsclient/lib/ovmsclient/tfs_compat/grpc/tensors.py b/client/python/ovmsclient/lib/ovmsclient/tfs_compat/grpc/tensors.py
class TensorType(NamedTuple):
+ NumpyPrimaryType: np.dtype.type
TensorDtype: str
TensorProtoField: str
- np.float16: TensorType(TensorDtype=DataType.DT_HALF, TensorProtoField="half_val"),
- np.float32: TensorType(TensorDtype=DataType.DT_FLOAT, TensorProtoField="float_val"),
- np.float64: TensorType(TensorDtype=DataType.DT_DOUBLE, TensorProtoField="double_val"),
- np.int8: TensorType(TensorDtype=DataType.DT_INT8, TensorProtoField="int_val"),
- np.int16: TensorType(TensorDtype=DataType.DT_INT16, TensorProtoField="int_val"),
- np.int32: TensorType(TensorDtype=DataType.DT_INT32, TensorProtoField="int_val"),
- np.int64: TensorType(TensorDtype=DataType.DT_INT64, TensorProtoField="int64_val"),
- np.uint8: TensorType(TensorDtype=DataType.DT_UINT8, TensorProtoField="int_val"),
- np.uint16: TensorType(TensorDtype=DataType.DT_UINT16, TensorProtoField="int_val"),
- np.uint32: TensorType(TensorDtype=DataType.DT_UINT32, TensorProtoField="uint32_val"),
- np.uint64: TensorType(TensorDtype=DataType.DT_UINT64, TensorProtoField="uint64_val"),
- np.complex64: TensorType(TensorDtype=DataType.DT_COMPLEX64, TensorProtoField="scomplex_val"),
- np.complex128: TensorType(TensorDtype=DataType.DT_COMPLEX128, TensorProtoField="dcomplex_val"),
+ np.float16: TensorType(NumpyPrimaryType=np.float16, TensorDtype=DataType.DT_HALF,
+ TensorProtoField="half_val"),
+ np.float32: TensorType(NumpyPrimaryType=np.float32, TensorDtype=DataType.DT_FLOAT,
+ TensorProtoField="float_val"),
+ np.float64: TensorType(NumpyPrimaryType=np.float64, TensorDtype=DataType.DT_DOUBLE,
+ TensorProtoField="double_val"),
+ np.int8: TensorType(NumpyPrimaryType=np.int8, TensorDtype=DataType.DT_INT8,
+ TensorProtoField="int_val"),
+ np.int16: TensorType(NumpyPrimaryType=np.int16, TensorDtype=DataType.DT_INT16,
+ TensorProtoField="int_val"),
+ np.int32: TensorType(NumpyPrimaryType=np.int32, TensorDtype=DataType.DT_INT32,
+ TensorProtoField="int_val"),
+ np.int64: TensorType(NumpyPrimaryType=np.int64, TensorDtype=DataType.DT_INT64,
+ TensorProtoField="int64_val"),
+ np.uint8: TensorType(NumpyPrimaryType=np.uint8, TensorDtype=DataType.DT_UINT8,
+ TensorProtoField="int_val"),
+ np.uint16: TensorType(NumpyPrimaryType=np.uint16, TensorDtype=DataType.DT_UINT16,
+ TensorProtoField="int_val"),
+ np.uint32: TensorType(NumpyPrimaryType=np.uint32, TensorDtype=DataType.DT_UINT32,
+ TensorProtoField="uint32_val"),
+ np.uint64: TensorType(NumpyPrimaryType=np.uint64, TensorDtype=DataType.DT_UINT64,
+ TensorProtoField="uint64_val"),
+ np.complex64: TensorType(NumpyPrimaryType=np.complex64, TensorDtype=DataType.DT_COMPLEX64,
+ TensorProtoField="scomplex_val"),
+ np.complex128: TensorType(NumpyPrimaryType=np.complex128, TensorDtype=DataType.DT_COMPLEX128,
+ TensorProtoField="dcomplex_val"),
# Standard Python bool and np.bool_ replace deprecated np.bool type
- np.bool_: TensorType(TensorDtype=DataType.DT_BOOL, TensorProtoField="bool_val"),
- bool: TensorType(TensorDtype=DataType.DT_BOOL, TensorProtoField="bool_val"),
- np.bytes_: TensorType(TensorDtype=DataType.DT_STRING, TensorProtoField="string_val")
+ bool: TensorType(NumpyPrimaryType=np.bool_, TensorDtype=DataType.DT_BOOL,
+ TensorProtoField="bool_val"),
+ np.bool_: TensorType(NumpyPrimaryType=np.bool_, TensorDtype=DataType.DT_BOOL,
+ TensorProtoField="bool_val"),
+ np.str_: TensorType(NumpyPrimaryType=np.bytes_, TensorDtype=DataType.DT_STRING,
+ TensorProtoField="string_val"),
+ np.bytes_: TensorType(NumpyPrimaryType=np.bytes_, TensorDtype=DataType.DT_STRING,
+ TensorProtoField="string_val"),
-TENSOR_TO_NP_MAP = {v.TensorDtype: k for k, v in NP_TO_TENSOR_MAP.items()}
+TENSOR_TO_NP_MAP = {v.TensorDtype: v.NumpyPrimaryType for v in NP_TO_TENSOR_MAP.values()}
TENSOR_DTYPE_TO_PROTOFIELD = {v.TensorDtype: v.TensorProtoField for v in NP_TO_TENSOR_MAP.values()}
@@ -79,8 +98,12 @@ def _cast_bytes_to_dtype(values, dtype):
raise ValueError(f'could not cast bytes to {dtype}. {e_info}')
-def _is_bytes_shape_valid(inferred_shape, tensor_values):
- return (len(inferred_shape) > 1 or (len(tensor_values.shape) > 1 and inferred_shape == []))
+# Functioned used with numpy.vectorize()
+def _encode_string_to_bytes(tensor_value, encoding="UTF-8"):
+ return tensor_value.encode(encoding)
+_encode_strings_to_bytes = np.vectorize(_encode_string_to_bytes)
def _check_if_array_homogeneous(tensor_values):
@@ -185,13 +208,18 @@ def make_tensor_proto(values, dtype=None, shape=None):
tensor_values = _cast_ndarray_to_dtype(tensor_values, np_dtype)
- if dtype == DataType.DT_STRING and _is_bytes_shape_valid(inferred_shape, tensor_values):
- raise ValueError("bytes values with dtype DT_STRING must be in shape [N]")
- elif inferred_shape == []:
+ if inferred_shape == []:
inferred_shape = list(tensor_values.shape)
elif inferred_shape != list(tensor_values.shape):
tensor_values = tensor_values.reshape(inferred_shape)
+ # For strings or binary image inputs flatten array to 1-D
+ if dtype == DataType.DT_STRING:
+ tensor_values = np.ravel(tensor_values)
+ # Encode strings
+ if tensor_values.dtype.type == np.str_:
+ tensor_values = _encode_strings_to_bytes(tensor_values)
dims = []
for d in inferred_shape:
@@ -274,7 +302,7 @@ def make_ndarray(tensor_proto):
elif np_dtype == np.complex128:
it = iter(tensor_proto.dcomplex_val)
values = np.array([complex(x[0], x[1]) for x in zip(it, it)], dtype=np_dtype)
- elif np_dtype == np.bool_ or np_dtype == bool:
+ elif np_dtype == np.bool_:
values = np.fromiter(tensor_proto.bool_val, dtype=np_dtype)
raise TypeError("Unsupported tensor type: %s" % tensor_proto.dtype)
diff --git a/client/python/ovmsclient/lib/ovmsclient/tfs_compat/http/requests.py b/client/python/ovmsclient/lib/ovmsclient/tfs_compat/http/requests.py
from ovmsclient.tfs_compat.base.requests import (PredictRequest, ModelMetadataRequest,
ModelStatusRequest, _check_model_spec)
from ovmsclient.tfs_compat.grpc.tensors import (NP_TO_TENSOR_MAP, DataType,
- _is_bytes_shape_valid,
@@ -167,6 +166,14 @@ def make_status_request(model_name, model_version=0):
return HttpModelStatusRequest(model_name, model_version)
+# Functioned used with numpy.vectorize()
+def _decode_bytes_to_string(tensor_value, encoding="UTF-8"):
+ return tensor_value.decode(encoding)
+_decode_bytes_to_strings = np.vectorize(_decode_bytes_to_string)
def _parse_input_data(values):
# create numpy ndarray from values and find its dtype if not provided
@@ -187,14 +194,14 @@ def _parse_input_data(values):
raise TypeError("provided values type is not valid")
- if dtype == DataType.DT_STRING and _is_bytes_shape_valid(tensor_values.shape, tensor_values):
- raise ValueError("bytes values with dtype DT_STRING must be in shape [N]")
- if dtype == DataType.DT_STRING:
- b64_values = []
- for value in tensor_values:
- b64_value = base64.b64encode(value).decode('utf-8')
- b64_values.append({"b64": b64_value})
- return b64_values
- else:
- return tensor_values.tolist()
+ if dtype == DataType.DT_STRING and tensor_values.dtype.type == np.bytes_:
+ if len(tensor_values.shape) != 1:
+ # Bytes in multidimensional shape will be encoded in UTF-8 as regular input
+ tensor_values = _decode_bytes_to_strings(tensor_values)
+ else:
+ b64_values = []
+ for value in tensor_values:
+ b64_value = base64.b64encode(value).decode('utf-8')
+ b64_values.append({"b64": b64_value})
+ return b64_values
+ return tensor_values.tolist()
diff --git a/client/python/ovmsclient/lib/scripts/build_tfs_api.sh b/client/python/ovmsclient/lib/scripts/build_tfs_api.sh
cp -R tf/tensorflow ovmsclient/tfs_compat/protos/tensorflow
cp -R tfs/tensorflow_serving ovmsclient/tfs_compat/protos/tensorflow_serving
find ovmsclient/tfs_compat/protos -name "*.proto" -exec sed -i -E 's/import "tensorflow/import "ovmsclient\/tfs_compat\/protos\/tensorflow/g' {} \;
+python3 scripts/rename_proto_package.py
protoc --proto_path=$PWD --python_out=$PWD/compiled_protos \
$PWD/ovmsclient/tfs_compat/protos/tensorflow/core/framework/*.proto \
diff --git a/client/python/ovmsclient/lib/scripts/rename_proto_package.py b/client/python/ovmsclient/lib/scripts/rename_proto_package.py
@@ -0,0 +1,44 @@
+# Copyright (c) 2023 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+base_path = "ovmsclient/tfs_compat/protos"
+# Find all proto files
+proto_paths = []
+for root, subdirs, files in os.walk(base_path):
+ for file in files:
+ if file.endswith(".proto"):
+ file_path = os.path.join(root, file)
+ proto_paths.append(file_path)
+# Replace package name if defined in all proto files
+replacement_map = {
+ "package tensorflow": "package ovmsclient",
+ " tensorflow.": " ovmsclient.",
+ " .tensorflow.": " .ovmsclient."
+for proto_path in proto_paths:
+ with open(proto_path, 'rt') as file :
+ filedata = file.read()
+ for to_replace, replace_with in replacement_map.items():
+ filedata = filedata.replace(to_replace, replace_with)
+ with open(proto_path, 'wt') as file:
+ file.write(filedata)
diff --git a/client/python/ovmsclient/lib/setup.py b/client/python/ovmsclient/lib/setup.py
- version="2022.3",
+ version="2023.0",
license="Apache License 2.0",
author="Intel Corporation",
diff --git a/client/python/ovmsclient/lib/tests/config.py b/client/python/ovmsclient/lib/tests/config.py
from enum import IntEnum
+import numpy as np
class CallCount(IntEnum):
@@ -337,7 +338,7 @@ class CallCount(IntEnum):
TypeError, "inputs keys type should be str, but found int"
- [{"input": [1, 2, "three"]}, "model_name", 1, 1],
+ [{"input": np.array(b'1', dtype=np.dtype(np.void, 10))}, "model_name", 1, 1],
TypeError, "provided values type is not valid"
diff --git a/client/python/ovmsclient/lib/tests/tfs_compat_grpc/config.py b/client/python/ovmsclient/lib/tests/tfs_compat_grpc/config.py
"input1": (1, 2, 3)
}, 'model_name', 0, TypeError,
"values type should be (list, np.ndarray, scalar), but is tuple"),
- ({
- "input1": [
- [bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00]),
- bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00])],
- [bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00]),
- bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00])]
- ]
- }, 'model_name', 0, ValueError, "bytes values with dtype DT_STRING must be in shape [N]"),
# (inputs_dict,
@@ -222,7 +214,9 @@
tensor_content=array([1, 2, 3, 4, 5, 6]).tobytes()),
"input2": 5.0,
- "input3": bytes([1, 2, 3])
+ "input3": bytes([1, 2, 3]),
+ "input4": [[bytes([1, 2, 3]), bytes([1, 2, 3])], [bytes([1, 2, 3]), bytes([1, 2, 3])]],
+ "input5": [["list", "of", "strings"]],
}, {
"input2": {
"field": "float_val",
@@ -235,7 +229,21 @@
"shape": TensorShapeProto(dim=[TensorShapeProto.Dim(size=1)]),
"dtype": DataType.DT_STRING,
'value': [bytes([1, 2, 3])]
- }
+ },
+ "input4": {
+ "field": "string_val",
+ "shape": TensorShapeProto(dim=[TensorShapeProto.Dim(size=2),
+ TensorShapeProto.Dim(size=2)]),
+ "dtype": DataType.DT_STRING,
+ 'value': [bytes([1, 2, 3]), bytes([1, 2, 3]), bytes([1, 2, 3]), bytes([1, 2, 3])]
+ },
+ "input5": {
+ "field": "string_val",
+ "shape": TensorShapeProto(dim=[TensorShapeProto.Dim(size=1),
+ TensorShapeProto.Dim(size=3)]),
+ "dtype": DataType.DT_STRING,
+ 'value': [b'list', b'of', b'strings']
+ },
}, 'model_name', 0),
@@ -274,9 +282,20 @@
"2": TensorProto(dtype=DataType.DT_STRING,
string_val=[bytes([1, 2, 3])]),
+ "3": TensorProto(dtype=DataType.DT_STRING,
+ tensor_shape=TensorShapeProto(dim=[TensorShapeProto.Dim(size=1),
+ TensorShapeProto.Dim(size=3)]),
+ string_val=[b'list', b'of', b'strings']),
+ "4": TensorProto(dtype=DataType.DT_STRING,
+ tensor_shape=TensorShapeProto(dim=[TensorShapeProto.Dim(size=2),
+ TensorShapeProto.Dim(size=2)]),
+ string_val=[bytes([1, 2, 3]), bytes([1, 2, 3]),
+ bytes([1, 2, 3]), bytes([1, 2, 3])]),
}, "model_name", 0, {
"1463": [bytes([1, 2, 3]), bytes([4, 5])],
- "2": [bytes([1, 2, 3])]
+ "2": [bytes([1, 2, 3])],
+ "3": [[b'list', b'of', b'strings']],
+ "4": [[bytes([1, 2, 3]), bytes([1, 2, 3])], [bytes([1, 2, 3]), bytes([1, 2, 3])]]
@@ -348,12 +348,22 @@ def test_make_tensor_proto_valid_scalar(params, expected_shape, expected_dtype,
TensorShapeProto(dim=[TensorShapeProto.Dim(size=1)]), DataType.DT_STRING,
+ ({"values": [[bytes([0x13, 0x00, 0x00, 0x00, 0x08]), bytes([0x13, 0x00, 0x00, 0x00, 0x08])]],
+ "dtype": DataType.DT_STRING},
+ TensorShapeProto(dim=[TensorShapeProto.Dim(size=1), TensorShapeProto.Dim(size=2)]),
+ DataType.DT_STRING,
+ "string_val"
+ ),
-def test_make_tensor_proto_valid_string(params, expected_shape, expected_dtype, expected_field):
+def test_make_tensor_proto_valid_binary(params, expected_shape, expected_dtype, expected_field):
tensor_proto = make_tensor_proto(**params)
if expected_field == "string_val":
- assert tensor_proto.__getattribute__(expected_field) == [params["values"]]
+ if type(params["values"]) is not list:
+ assert tensor_proto.__getattribute__(expected_field) == [params["values"]]
+ else:
+ assert (tensor_proto.__getattribute__(expected_field)
+ == np.ravel(params["values"]).tolist())
assert (tensor_proto.__getattribute__(expected_field)
== np.frombuffer(params["values"],
@@ -362,6 +372,31 @@ def test_make_tensor_proto_valid_string(params, expected_shape, expected_dtype,
assert tensor_proto.tensor_shape == expected_shape
+@pytest.mark.parametrize("params, expected_shape", [
+ ({"values": "string", "dtype": DataType.DT_STRING},
+ TensorShapeProto(dim=[TensorShapeProto.Dim(size=1)])
+ ),
+ ({"values": ["list", "of", "strings"], "shape": [3], "dtype": DataType.DT_STRING},
+ TensorShapeProto(dim=[TensorShapeProto.Dim(size=3)])
+ ),
+ ({"values": [["nested", "list", "of", "strings"]]},
+ TensorShapeProto(dim=[TensorShapeProto.Dim(size=1), TensorShapeProto.Dim(size=4)])
+ ),
+ # Upon numpy array creation it will be casted to numpy.str_ data type
+ ({"values": [1, 2, "three"]},
+ TensorShapeProto(dim=[TensorShapeProto.Dim(size=3)])
+ ),
+ ({"values": [[1, 2], [3, "four"]]},
+ TensorShapeProto(dim=[TensorShapeProto.Dim(size=2), TensorShapeProto.Dim(size=2)])
+ ),
+def test_make_tensor_proto_valid_string(params, expected_shape):
+ tensor_proto = make_tensor_proto(**params)
+ assert tensor_proto.string_val == np.ravel(params["values"]).astype(np.bytes_).tolist()
+ assert tensor_proto.dtype == DataType.DT_STRING
+ assert tensor_proto.tensor_shape == expected_shape
def test_make_tensor_proto_valid_string_to_float_dtype():
values = bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00])
tensor_proto = make_tensor_proto(values=values, shape=[3], dtype=DataType.DT_INT16)
@@ -625,40 +660,6 @@ def test_make_tensor_proto_invalid_values_type():
assert str(exception) == "values type should be (list, np.ndarray, scalar), but is tuple"
-def test_make_tensor_proto_invalid_string_2D_array():
- values = bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00])
- with pytest.raises(ValueError) as exception_info:
- make_tensor_proto(values=[[values, values], [values, values]],
- shape=None, dtype=DataType.DT_STRING)
- exception = exception_info.value
- assert str(exception) == "bytes values with dtype DT_STRING must be in shape [N]"
-def test_make_tensor_proto_invalid_string_reshape():
- values = bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00])
- with pytest.raises(ValueError) as exception_info:
- make_tensor_proto(values=values, shape=[6], dtype=DataType.DT_STRING)
- exception = exception_info.value
- assert str(exception) == "cannot reshape array of size 1 into shape (6,)"
-def test_make_tensor_proto_invalid_string_reshape_2():
- values = bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00])
- with pytest.raises(ValueError) as exception_info:
- make_tensor_proto(values=values, shape=[2, 3], dtype=DataType.DT_STRING)
- exception = exception_info.value
- assert str(exception) == "bytes values with dtype DT_STRING must be in shape [N]"
-def test_make_tensor_proto_invalid_string_2D_array_with_shape():
- values = bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00])
- with pytest.raises(ValueError) as exception_info:
- make_tensor_proto(values=[[values, values], [values, values]],
- shape=[2, 2], dtype=DataType.DT_STRING)
- exception = exception_info.value
- assert str(exception) == "bytes values with dtype DT_STRING must be in shape [N]"
def test_make_tensor_proto_invalid_int_reshape():
values = [1, 2, 3]
with pytest.raises(ValueError) as exception_info:
@@ -673,11 +674,3 @@ def test_make_tensor_proto_invalid_empty_list_of_empty_lists_reshape():
make_tensor_proto(values=values, shape=[4, 2], dtype=DataType.DT_INT8)
exception = exception_info.value
assert str(exception) == "cannot reshape array of size 0 into shape (4,2)"
-def test_make_tensor_proto_invalid_dtype_provided():
- values = [[1.0, 2.0, 3.0], [1.0, 2.0, 3.0]]
- with pytest.raises(ValueError) as exception_info:
- make_tensor_proto(values=values, shape=None, dtype=DataType.DT_STRING)
- exception = exception_info.value
- assert str(exception) == "bytes values with dtype DT_STRING must be in shape [N]"
@@ -20,6 +20,7 @@
import requests
from http import HTTPStatus
from numpy import array, int32, float32, float128
+import numpy as np
from ovmsclient.tfs_compat.protos.tensorflow.core.protobuf.error_codes_pb2 import Code as ErrorCode
from ovmsclient.tfs_compat.base.errors import InvalidInputError, ModelNotFoundError
@@ -57,14 +58,6 @@
"input1": (1, 2, 3)
}, 'model_name', 0, TypeError,
"values type should be (list, np.ndarray, scalar), but is tuple"),
- ({
- "input1": [
- [bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00]),
- bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00])],
- [bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00]),
- bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00])]
- ]
- }, 'model_name', 0, ValueError, "bytes values with dtype DT_STRING must be in shape [N]"),
# (inputs_dict,
@@ -85,11 +78,13 @@
"input1": 5.0,
- "input2": bytes([1, 2, 3])
+ "input2": bytes([1, 2, 3]),
+ "input3": ["list", "of", "strings"]
}, json.dumps({
"inputs": {
"input1": [5.0],
- "input2": [{"b64": "AQID"}]
+ "input2": [{"b64": "AQID"}],
+ "input3": ["list", "of", "strings"]
}), 'model_name', 0),
@@ -119,7 +114,10 @@
([1, 2, 3.0], [1.0, 2.0, 3.0]),
([bytes([1, 2, 3]), bytes([4, 5, 6]), bytes([7, 8, 9])],
- [{"b64": "AQID"}, {"b64": "BAUG"}, {"b64": "BwgJ"}])
+ [{"b64": "AQID"}, {"b64": "BAUG"}, {"b64": "BwgJ"}]),
+ ([[bytes([111, 118]), bytes([109, 115])], [bytes([111, 118]), bytes([109, 115])]],
+ [["ov", "ms"], ["ov", "ms"]])
# (inputs_dict,
@@ -136,23 +134,13 @@
"The requested array has an inhomogeneous shape after 2 dimensions. "
"The detected shape was (3, 1) + inhomogeneous part.")),
- ([1, 2, 3, "str"],
- TypeError, "provided values type is not valid"),
- ([[1, 2], [3, 4], ["five", 6]],
+ (np.array(b'1', dtype=np.dtype(np.void, 10)),
TypeError, "provided values type is not valid"),
(float128(2.5), TypeError, "provided values type is not valid"),
((1, 2, 3), TypeError,
"values type should be (list, np.ndarray, scalar), but is tuple"),
- ([
- [bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00]),
- bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00])],
- [bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00]),
- bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00])]
- ], ValueError, "bytes values with dtype DT_STRING must be in shape [N]"),
# (config_dict,
@@ -15,6 +15,8 @@
+ ovms_docs_demo_mediapipe_image_classification
+ ovms_docs_demo_mediapipe_multi_model
@@ -23,6 +25,7 @@
+ ovms_demo_universal-sentence-encoder
@@ -48,6 +51,7 @@ OpenVINO Model Server demos have been created to showcase the usage of the model
|[Real Time Stream Analysis](real_time_stream_analysis/python/README.md)| Analyze RTSP video stream in real time with generic application template for custom pre and post processing routines as well as simple results visualizer for displaying predictions in the browser. |
|[Natural Language Processing with BERT](bert_question_answering/python/README.md)|Provide a knowledge source and a query and use BERT model for question answering use case via gRPC API. This demo uses dynamic shape feature. |
|[GPT-J Causal Language Modeling](gptj_causal_lm/python/README.md)|Write start of the sentence and let GPT-J continue via gRPC API. This demo uses dynamic shape feature. |
+|[Using inputs data in string format with universal-sentence-encoder model](universal-sentence-encoder/README.md)| Handling AI model with text as the model input. |
|[Speech Recognition on Kaldi Model](speech_recognition_with_kaldi_model/python/README.md)|Run inference on a speech sample and use Kaldi model to perform speech recognition via gRPC API. This demo uses [stateful model](../docs/stateful_models.md). |
|[Benchmark App](benchmark/python/README.md)|Generate traffic and measure performance of the model served in OpenVINO Model Server.|
|[Face Blur Pipeline](face_blur/python/README.md)|Detect faces and blur image using a pipeline of object detection models with a custom node for intermediate results processing via gRPC API. This demo uses [pipeline](../docs/dag_scheduler.md) with [face_blur custom node](https://github.com/openvinotoolkit/model_server/tree/releases/2022/1/src/custom_nodes/face_blur). |
@@ -363,6 +363,8 @@ def __fix_shape_and_type(self, input_name):
dtype = tensorflow.dtypes.int32
elif dtype == self.DTYPE_INT_64:
dtype = tensorflow.dtypes.int64
+ elif dtype == self.DTYPE_UINT_8:
+ dtype = tensorflow.dtypes.uint8
else: raise ValueError(f"not supported type: {dtype}")
return shape, dtype
@@ -22,4 +22,4 @@ COPY bert_question_answering.py tokens_bert.py html_reader.py requirements.txt .
ADD https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.1/models_bin/2/bert-small-uncased-whole-word-masking-squad-int8-0002/vocab.txt .
RUN pip3 install --upgrade pip && pip3 install --no-cache-dir -r requirements.txt
-ENTRYPOINT ["python3", "bert_question_answering.py", "-v", "vocab.txt", "-i", "https://en.wikipedia.org/wiki/BERT_(language_model)", "--question", "what is bert", "--grpc_port", "9000", "--input_names", "input_ids,attention_mask,token_type_ids,position_ids"]
+ENTRYPOINT ["python3", "bert_question_answering.py", "-v", "vocab.txt", "-i", "https://en.wikipedia.org/w/index.php?title=BERT_(language_model)&oldid=1148859098", "--question", "what is bert", "--grpc_port", "9000", "--input_names", "input_ids,attention_mask,token_type_ids,position_ids"]
diff --git a/demos/bert_question_answering/python/README.md b/demos/bert_question_answering/python/README.md
index 17b7f46c15..e9fa9905d3 100644
--- a/demos/bert_question_answering/python/README.md
+++ b/demos/bert_question_answering/python/README.md
@@ -31,30 +31,30 @@ docker run -it --network host -e no_proxy=localhost bert-client:latest --grpc_ad
Docker image with BERT client by default start the container with a command:
-python bert_question_answering.py -v vocab.txt -i "https://en.wikipedia.org/wiki/BERT_(language_model)" --question "what is bert" --grpc_port 9000 --input_names input_ids,attention_mask,token_type_ids,position_ids
+python bert_question_answering.py -v vocab.txt -i "https://en.wikipedia.org/w/index.php?title=BERT_(language_model)&oldid=1148859098" --question "what is bert" --grpc_port 9000 --input_names input_ids,attention_mask,token_type_ids,position_ids
You can change the entrypoint to adjust to different parameters
Example of the output snippet:
question: what is bert
-[ INFO ] Sequence of length 395 is processed with 1.85 requests/sec (0.54 sec per request)
-[ INFO ] Sequence of length 368 is processed with 2.74 requests/sec (0.36 sec per request)
-[ INFO ] Sequence of length 32 is processed with 16.51 requests/sec (0.061 sec per request)
+[ INFO ] Sequence of length 418 is processed with 9.91 requests/sec (0.1 sec per request)
+[ INFO ] Sequence of length 305 is processed with 12.08 requests/sec (0.083 sec per request)
+[ INFO ] Sequence of length 349 is processed with 12.14 requests/sec (0.082 sec per request)
+[ INFO ] Sequence of length 110 is processed with 17.98 requests/sec (0.056 sec per request)
[ INFO ] The performance below is reported only for reference purposes, please use the benchmark_app tool (part of the OpenVINO samples) for any actual measurements.
-[ INFO ] 3 requests were processed in 1.00sec (0.33sec per request)
-[ INFO ] ---answer: 0.26 deeply bidirectional, unsupervised language representation
-[ INFO ] When BERT was published, it achieved state-of-the-art performance on a number of natural language understanding tasks:[1]
-The reasons for BERT's state-of-the-art performance on these natural language understanding tasks are not yet well understood.[8][9] Current research has focused on investigating the relationship behind BERT's output as a result of carefully chosen input sequences,[10][11] analysis of internal vector representations through probing classifiers,[12][13] and the relationships represented by attention weights.[8][9]
-BERT has its origins from pre-training contextual representations including semi-supervised sequence learning,[14] generative pre-training, ELMo,[15] and ULMFit.[16] Unlike previous models, BERT is a deeply bidirectional, unsupervised language representation, pre-trained using only a plain text corpus. Context-free models such as word2vec or GloVe generate a single word embedding representation for each word in the vocabulary, where BERT takes into account the context for each occurrence of a given word. For instance, whereas the vector for "running" will have the same word2vec vector representation for both of its occurrences in the sentences "He is running a company" and "He is running a marathon", BERT will provide a contextualized embedding that will be different according to the sentence.
-On October 25, 2019, Google Search announced that they had started applying BERT models for English language search queries within the US.[17] On December 9, 2019, it was reported that BERT had been adopted by Google Search for over 70 languages.[18] In October 2020, almost every single English-based query was processed by BERT.[19]
-[ INFO ] ---answer: 0.22 Bidirectional Encoder Representations from Transformers
-[ INFO ] Bidirectional Encoder Representations from Transformers (BERT) is a transformer-based machine learning technique for natural language processing (NLP) pre-training developed by Google. BERT was created and published in 2018 by Jacob Devlin and his colleagues from Google.[1][2] In 2019, Google announced that it had begun leveraging BERT in its search engine, and by late 2020 it was using BERT in almost every English-language query. A 2020 literature survey concluded that "in a little over a year, BERT has become a ubiquitous baseline in NLP experiments", counting over 150 research publications analyzing and improving the model.[3]
-The original English-language BERT has two models:[1] (1) the BERTBASE: 12 encoders with 12 bidirectional self-attention heads, and (2) the BERTLARGE: 24 encoders with 16 bidirectional self-attention heads. Both models are pre-trained from unlabeled data extracted from the BooksCorpus[4] with 800M words and English Wikipedia with 2,500M words.
-BERT is at its core a transformer language model with a variable number of encoder layers and self-attention heads. The architecture is "almost identical" to the original transformer implementation in Vaswani et al. (2017).[5]
-BERT was pretrained on two tasks: language modeling (15% of tokens were masked and BERT was trained to predict them from context) and next sentence prediction (BERT was trained to predict if a chosen next sentence was probable or not given the first sentence). As a result of the training process, BERT learns contextual embeddings for words. After pretraining, which is computationally expensive, BERT can be finetuned with fewer resources on smaller datasets to optimize its performance on specific tasks.[1][6]
-[ INFO ] ---answer: 0.36 The research paper describing BERT
-[ INFO ] The research paper describing BERT won the Best Long Paper Award at the 2019 Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL).[20]
+[ INFO ] 4 requests were processed in 0.34sec (0.086sec per request)
+[ INFO ] ---answer: 0.35 Bidirectional Encoder Representations from Transformers
+[ INFO ] This is the current revision of this page, as edited by Mandarax (talk | contribs) at 19:01, 8 April 2023 (Correct capitalization). The present address (URL) is a permanent link to this version.Bidirectional Encoder Representations from Transformers (BERT) is a family of masked-language models introduced in 2018 by researchers at Google.[1][2] A 2020 literature survey concluded that "in a little over a year, BERT has become a ubiquitous baseline in Natural Language Processing (NLP) experiments counting over 150 research publications analyzing and improving the model."[3]
+BERT was originally implemented in the English language at two model sizes:[1] (1) BERTBASE: 12 encoders with 12 bidirectional self-attention heads totaling 110 million parameters, and (2) BERTLARGE: 24 encoders with 16 bidirectional self-attention heads totaling 340 million parameters. Both models were pre-trained on the Toronto BookCorpus[4] (800M words) and English Wikipedia (2,500M words).
+BERT is based on the transformer architecture. Specifically, BERT is composed of Transformer encoder layers.
+BERT was pre-trained simultaneously on two tasks: language modeling (15% of tokens were masked, and the training objective was to predict the original token given its context) and next sentence prediction (the training objective was to classify if two spans of text appeared sequentially in the training corpus).[5] As a result of this training process, BERT learns latent representations of words and sentences in context. After pre-training, BERT can be fine-tuned with fewer resources on smaller datasets to optimize its performance on specific tasks such as NLP tasks (language inference, text classification) and sequence-to-sequence based language generation tasks (question-answering, conversational response generation).[1][6] The pre-training stage is significantly more computationally expensive than fine-tuning.
+[ INFO ] ---answer: 0.14 deeply bidirectional, unsupervised language representation
+[ INFO ] In contrast to deep learning neural networks which require very large amounts of data, BERT has already been pre-trained which means that it has learnt the representations of the words and sentences as well as the underlying semantic relations that they are connected with. BERT can then be fine-tuned on smaller datasets for specific tasks such as sentiment classification. The pre-trained models are chosen according to the content of the given dataset one uses but also the goal of the task. For example, if the task is a sentiment classification task on financial data, a pre-trained model for the analysis of sentiment of financial text should be chosen. The weights of the original pre-trained models were released on GitHub.[16]
+BERT was originally published by Google researchers Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. The design has its origins from pre-training contextual representations, including semi-supervised sequence learning,[17] generative pre-training, ELMo,[18] and ULMFit.[19] Unlike previous models, BERT is a deeply bidirectional, unsupervised language representation, pre-trained using only a plain text corpus. Context-free models such as word2vec or GloVe generate a single word embedding representation for each word in the vocabulary, where BERT takes into account the context for each occurrence of a given word. For instance, whereas the vector for "running" will have the same word2vec vector representation for both of its occurrences in the sentences "He is running a company" and "He is running a marathon", BERT will provide a contextualized embedding that will be different according to the sentence.
+[ INFO ] ---answer: 0.12 BERT models for English language search queries within the US
+[ INFO ] On October 25, 2019, Google announced that they had started applying BERT models for English language search queries within the US.[20] On December 9, 2019, it was reported that BERT had been adopted by Google Search for over 70 languages.[21] In October 2020, almost every single English-based query was processed by a BERT model.[22]
+The research paper describing BERT won the Best Long Paper Award at the 2019 Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL).[23]
@@ -1,5 +1,5 @@
-# Copyright (c) 2021 Intel Corporation
+# Copyright (c) 2021-2023 Intel Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -23,11 +23,8 @@
from tokens_bert import text_to_tokens, load_vocab_file, Token
from html_reader import get_paragraphs
-import grpc
import numpy as np
-from tensorflow import make_tensor_proto, make_ndarray
-from tensorflow_serving.apis import predict_pb2
-from tensorflow_serving.apis import prediction_service_pb2_grpc
+import ovmsclient
class ConcatenatedParagraph():
def __init__(self, text="", tokens=[]):
@@ -102,8 +99,7 @@ def main():
args = build_argparser().parse_args()
# create grpc connection
- channel = grpc.insecure_channel("{}:{}".format(args.grpc_address,args.grpc_port))
- stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
+ client = ovmsclient.make_grpc_client("{}:{}".format(args.grpc_address,args.grpc_port))
if args.colors:
COLOR_RED = "\033[91m"
@@ -183,23 +179,9 @@ def main():
if len(input_names)>3:
inputs[input_names[3]] = np.arange(input_ids_length, dtype=np.int64)[None,:]
- #print("inputs:",inputs)
- # create grpc prediction request
- request = predict_pb2.PredictRequest()
- request.model_spec.name = args.model_name
- for inp_name in inputs:
- request.inputs[inp_name].CopyFrom(make_tensor_proto(inputs[inp_name], shape=(inputs[inp_name].shape)))
t_start = time.perf_counter()
- result = stub.Predict(request, 10.0) # result includes a dictionary with all model outputs
+ res = client.predict(inputs, args.model_name, timeout=10.0)
t_end = time.perf_counter()
- #print("\nresult:", result)
- res = {}
- for out_name in output_names:
- # print("out_name:",out_name)
- res[out_name] = make_ndarray(result.outputs[out_name])
t_count += 1
log.info("Sequence of length {} is processed with {:0.2f} requests/sec ({:0.2} sec per request)".format(
@@ -1,2 +1,2 @@
@@ -62,6 +62,10 @@ build --cxxopt=-fno-strict-overflow
build --cxxopt=-fno-delete-null-pointer-checks
build --cxxopt=-fwrapv
build --cxxopt=-fstack-protector
+build --cxxopt=-fstack-clash-protection
+build --cxxopt=-Wformat
+build --cxxopt=-Wformat-security
+build --cxxopt=-Werror=format-security
# Adding "--cxxopt=-D_GLIBCXX_USE_CXX11_ABI=0" creates parity with TF
# compilation options. It also addresses memory use due to
@@ -101,7 +101,7 @@ RUN bazel build \
@com_github_grpc_grpc//:grpc++ \
@com_google_protobuf//:protobuf_lite \
@org_tensorflow//tensorflow/core:lib \
- @opencv//:opencv
+ @linux_opencv//:opencv
COPY src/ /build/src/
@@ -67,7 +67,7 @@ grpc_extra_deps()
##################### OPEN CV ######################
- name = "opencv",
+ name = "linux_opencv",
build_file = "@//third_party/opencv:BUILD",
path = "/opt/opencv",
@@ -41,7 +41,7 @@ cc_binary(
- "@opencv//:opencv",
+ "@linux_opencv//:opencv",
@@ -57,7 +57,7 @@ cc_binary(
- "@opencv//:opencv",
+ "@linux_opencv//:opencv",
@@ -1,3 +1,4 @@
\ No newline at end of file
@@ -0,0 +1,38 @@
+# Copyright (c) 2023 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# See the License for the specific language governing permissions and
+# limitations under the License.
+.PHONY: setup
+default: setup
+# Build custom node
+ cd ../../../src/custom_nodes/tokenizer && \
+ make BASE_OS=ubuntu
+ mkdir -p workspace/lib && \
+ cp ../../../src/custom_nodes/tokenizer/lib/ubuntu/libdetokenizer.so workspace/lib/libdetokenizer.so && \
+ cp ../../../src/custom_nodes/tokenizer/lib/ubuntu/libtokenizer.so workspace/lib/libtokenizer.so
+# Prepare tokenization models
+ mkdir -p workspace/tokenizers
+ wget https://github.com/microsoft/BlingFire/raw/${BLINGFIRE_SHA}/ldbsrc/ldb/gpt2.bin -O workspace/tokenizers/gpt2.bin
+ wget https://github.com/microsoft/BlingFire/raw/${BLINGFIRE_SHA}/ldbsrc/ldb/gpt2.i2w -O workspace/tokenizers/gpt2.i2w
+# Copy configuration file to workspace directory
+ cp config.json workspace/.
+ @rm -rf workspace
\ No newline at end of file
@@ -101,3 +101,58 @@ tensor([[[ 8.4078, 7.2025, 5.1148, ..., -6.6914, -6.7891, -6.6537],
predicted word: a
+# Pipeline mode with server side tokenization and detokenization
+This variant offloads tokenizaton and detokenization step from client to the server. OVMS can convert string proto to `2D U8` tensor and pass the data to tokenization custom node. This way we generate tokens for `gpt-j-6b` model automatically and get the response as text instead of probability vector.
+## Prepare environment
+Use `make` command to prepare custom node libraries, blingfire tokenization models and configuration file.
+Workspace should look as follows:
+tree workspace
+├── config.json
+├── lib
+│ ├── libdetokenizer.so
+│ └── libtokenizer.so
+└── tokenizers
+ ├── gpt2.bin
+ └── gpt2.i2w
+2 directories, 5 files
+Start OVMS with prepared workspace:
+docker run -d --rm -p 9000:9000 \
+ -v $(pwd)/onnx:/onnx:ro \
+ -v $(pwd)/workspace:/workspace:ro \
+ openvino/model_server \
+ --port 9000 \
+ --config_path /workspace/config.json
+Install Tensorflow Serving API package:
+pip install --upgrade pip
+pip install tensorflow-serving-api==2.11.0
+Run example client:
+python3 dag_client.py --url localhost:9000 --model_name my_gpt_pipeline --input "Neurons are fascinating"
+0.5515012741088867 Neurons are fascinating cells that are responsible for the transmission of information from one brain region to another. They are also responsible for the production of hormones and neurotransmitters that are responsible for the regulation of mood, sleep, appetite, and sexual function.
diff --git a/demos/gptj_causal_lm/python/app.py b/demos/gptj_causal_lm/python/app.py
@@ -48,13 +48,11 @@
predicted_token_id = token = torch.argmax(torch.nn.functional.softmax(torch.Tensor(results[0,-1,:]),dim=-1),dim=-1)
word = tokenizer.decode(predicted_token_id)
input_sentence += word
- # print(f"Iteration: {iteration}\nLast predicted token: {predicted_token_id}\nLast latency: {last_latency}s\n{input_sentence}")
print(word, end='', flush=True)
iteration += 1
if predicted_token_id == args['eos_token_id']:
-# split line below to 3 different lines
print(f"Number of iterations: {iteration}")
print(f"First latency: {first_latency}s")
print(f"Last latency: {last_latency}s")
@@ -0,0 +1,123 @@
+ "model_config_list": [
+ {"config": {
+ "name": "gpt",
+ "base_path": "/onnx",
+ "plugin_config": {"PERFORMANCE_HINT": "LATENCY", "NUM_STREAMS": 1}}}
+ ],
+ "pipeline_config_list": [
+ {
+ "name": "my_gpt_pipeline",
+ "inputs": [
+ "texts"
+ ],
+ "nodes": [
+ {
+ "name": "node_1",
+ "type": "custom",
+ "inputs": [
+ {
+ "texts": {
+ "node_name": "request",
+ "data_item": "texts"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "data_item": "input_ids",
+ "alias": "out"
+ },
+ {
+ "data_item": "attention_mask",
+ "alias": "attention"
+ }
+ ],
+ "library_name": "tokenizer",
+ "params": {
+ "max_ids_arr_length": "4096",
+ "model_path": "/workspace/tokenizers/gpt2.bin"
+ }
+ },
+ {
+ "name": "gpt_node",
+ "model_name": "gpt",
+ "type": "DL model",
+ "inputs": [
+ {
+ "input_ids": {
+ "node_name": "node_1",
+ "data_item": "out"
+ }
+ },
+ {
+ "attention_mask": {
+ "node_name": "node_1",
+ "data_item": "attention"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "data_item": "logits",
+ "alias": "logits"
+ }
+ ]
+ },
+ {
+ "name": "node_2",
+ "type": "custom",
+ "inputs": [
+ {
+ "logits": {
+ "node_name": "gpt_node",
+ "data_item": "logits"
+ }
+ },
+ {
+ "input_ids": {
+ "node_name": "node_1",
+ "data_item": "out"
+ }
+ },
+ {
+ "attention_mask": {
+ "node_name": "node_1",
+ "data_item": "attention"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "data_item": "texts",
+ "alias": "texts"
+ }
+ ],
+ "library_name": "detokenizer",
+ "params": {
+ "max_buffer_length": "8192",
+ "model_path": "/workspace/tokenizers/gpt2.i2w"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "autocompletions_string": {
+ "node_name": "node_2",
+ "data_item": "texts"
+ }
+ }
+ ]
+ }
+ ],
+ "custom_node_library_config_list": [
+ {
+ "name": "tokenizer",
+ "base_path": "/workspace/lib/libtokenizer.so"
+ },
+ {
+ "name": "detokenizer",
+ "base_path": "/workspace/lib/libdetokenizer.so"
+ }
+ ]
@@ -0,0 +1,48 @@
+# Copyright 2023 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import time
+import argparse
+import grpc
+import numpy as np
+from tensorflow_serving.apis import predict_pb2
+from tensorflow_serving.apis import prediction_service_pb2_grpc
+from tensorflow import make_tensor_proto, make_ndarray
+parser = argparse.ArgumentParser(description='Demo for GPT-J causal LM DAG requests using Tensorflow Serving gRPC API')
+parser.add_argument('--input', required=True, help='Beginning of a sentence', type=str)
+parser.add_argument('--url', required=False, help='Url to connect to', type=str, default='localhost:9000')
+parser.add_argument('--model_name', required=False, help='Model name in the serving', type=str, default='my_gpt_pipeline')
+args = vars(parser.parse_args())
+channel = grpc.insecure_channel(args['url'])
+stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
+input_sentence = args['input']
+predict_request = predict_pb2.PredictRequest()
+predict_request.model_spec.name = args['model_name']
+while True:
+ predict_request.inputs['texts'].CopyFrom(make_tensor_proto(np.array([input_sentence])))
+ start_time = time.time()
+ predict_response = stub.Predict(predict_request, 10.0)
+ latency = time.time() - start_time
+ results = make_ndarray(predict_response.outputs['autocompletions_string'])
+ input_sentence = results[0].decode('utf-8')
+ print(latency, input_sentence)
@@ -1,7 +1,7 @@
\ No newline at end of file
@@ -72,7 +72,7 @@ def get_text(output):
def draw_boxes_spotting(frame, result):
output = make_ndarray(result.outputs['boxes'])
- for i in range(0, 100): # there is returned 200 detections for each image in the batch
+ for i in range(0, output.shape[0]): # there is returned a dynamic list of boxes
detection = output[i,:]
if detection[4] > 0.3:
x_min = int(detection[0])
@@ -0,0 +1,82 @@
+# MediaPipe Image Classification Demo {#ovms_docs_demo_mediapipe_image_classification}
+This guide shows how to implement [MediaPipe](../../../docs/mediapipe.md) graph using OVMS.
+Example usage of graph that contains only one model - resnet:
+## Prepare the repository
+Clone the repository and enter mediapipe image_classification directory
+git clone https://github.com/openvinotoolkit/model_server.git
+cd model_server/demos/mediapipe/image_classification
+## Download ResNet50 model
+mkdir -p model/1
+wget -P model/1 https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.1/models_bin/2/resnet50-binary-0001/FP32-INT1/resnet50-binary-0001.bin
+wget -P model/1 https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.1/models_bin/2/resnet50-binary-0001/FP32-INT1/resnet50-binary-0001.xml
+## Run OpenVINO Model Server
+docker run -d -v $PWD:/mediapipe -p 9000:9000 openvino/model_server:latest --config_path /mediapipe/config.json --port 9000
+## Run the client:
+cd model_server/client/python/kserve-api/samples
+python grpc_infer_resnet.py --model_name resnetMediapipe --grpc_port 9008 --images_numpy_path . --transpose_input False
+./../imgs.npy --input_name in --output_name out --labels_numpy_path ../../lbs.npy
+Image data range: 0.0 : 255.0
+Start processing:
+ Model name: resnetMediapipe
+ Iterations: 10
+ Images numpy path: ../../imgs.npy
+ Numpy file shape: (10, 3, 224, 224)
+Iteration 1; Processing time: 14.40 ms; speed 69.46 fps
+imagenet top results in a single batch:
+ 0 airliner 404 ; Correct match.
+Iteration 2; Processing time: 10.72 ms; speed 93.32 fps
+imagenet top results in a single batch:
+ 0 Arctic fox, white fox, Alopex lagopus 279 ; Correct match.
+Iteration 3; Processing time: 9.27 ms; speed 107.83 fps
+imagenet top results in a single batch:
+ 0 bee 309 ; Correct match.
+Iteration 4; Processing time: 8.47 ms; speed 118.02 fps
+imagenet top results in a single batch:
+ 0 golden retriever 207 ; Correct match.
+Iteration 5; Processing time: 9.17 ms; speed 109.03 fps
+imagenet top results in a single batch:
+ 0 gorilla, Gorilla gorilla 366 ; Correct match.
+Iteration 6; Processing time: 8.56 ms; speed 116.78 fps
+imagenet top results in a single batch:
+ 0 magnetic compass 635 ; Correct match.
+Iteration 7; Processing time: 8.39 ms; speed 119.16 fps
+imagenet top results in a single batch:
+ 0 peacock 84 ; Correct match.
+Iteration 8; Processing time: 8.44 ms; speed 118.44 fps
+imagenet top results in a single batch:
+ 0 pelican 144 ; Correct match.
+Iteration 9; Processing time: 8.36 ms; speed 119.55 fps
+imagenet top results in a single batch:
+ 0 snail 113 ; Correct match.
+Iteration 10; Processing time: 9.16 ms; speed 109.19 fps
+imagenet top results in a single batch:
+ 0 zebra 340 ; Correct match.
+processing time for all iterations
+average time: 9.10 ms; average speed: 109.89 fps
+median time: 8.50 ms; median speed: 117.65 fps
+max time: 14.00 ms; min speed: 71.43 fps
+min time: 8.00 ms; max speed: 125.00 fps
+time percentile 90: 10.40 ms; speed percentile 90: 96.15 fps
+time percentile 50: 8.50 ms; speed percentile 50: 117.65 fps
+time standard deviation: 1.76
+time variance: 3.09
+Classification accuracy: 100.00
@@ -0,0 +1,4 @@
+ "model_config_list": [],
+ "mediapipe_config_list": [{"name":"resnetMediapipe"}]
@@ -0,0 +1,45 @@
+# Copyright 2023 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# See the License for the specific language governing permissions and
+# limitations under the License.
+input_stream: "in"
+output_stream: "out"
+node {
+ calculator: "ModelAPISessionCalculator"
+ output_side_packet: "SESSION:session"
+ node_options: {
+ [type.googleapis.com / mediapipe.ModelAPIOVMSSessionCalculatorOptions]: {
+ servable_name: "resnet"
+ servable_version: "1"
+ }
+ }
+node {
+ calculator: "ModelAPISideFeedCalculator"
+ input_side_packet: "SESSION:session"
+ input_stream: "B:in"
+ output_stream: "A:out"
+ node_options: {
+ [type.googleapis.com / mediapipe.ModelAPIInferenceCalculatorOptions]: {
+ tag_to_input_tensor_names {
+ key: "B"
+ value: "0"
+ }
+ tag_to_output_tensor_names {
+ key: "A"
+ value: "1463"
+ }
+ }
+ }
@@ -0,0 +1,9 @@
+ "model_config_list": [
+ {"config": {
+ "name": "resnet",
+ "base_path": "model"
+ }
+ }
+ ]
@@ -0,0 +1,33 @@
+# MediaPipe Multi Model Demo {#ovms_docs_demo_mediapipe_multi_model}
+This guide shows how to implement [MediaPipe](../../../docs/mediapipe.md) graph using OVMS.
+Example usage:
+## Prepare the repository
+Clone the repository and enter mediapipe image_classification directory
+git clone https://github.com/openvinotoolkit/model_server.git
+cd model_server/demos/mediapipe/multi_model_graph
+## Download ResNet50 model
+cp -r ../../../src/test/add_two_inputs_model ./
+cp -r ../../../src/test/dummy ./
+## Run OpenVINO Model Server
+Prepare virtualenv according to [kserve samples readme](https://github.com/openvinotoolkit/model_server/blob/main/client/python/kserve-api/samples/README.md)
+docker run -d -v $PWD:/mediapipe -p 9000:9000 openvino/model_server:latest --config_path /mediapipe/config.json --port 9000
+## Run the client:
+python mediapipe_multi_model_client.py --grpc_port 9000
+[[ 3. 5. 7. 9. 11. 13. 15. 17. 19. 21.]]
@@ -0,0 +1,4 @@
+ "model_config_list": [],
+ "mediapipe_config_list": [{"name":"dummyAdd"}]
@@ -0,0 +1,79 @@
+# Copyright 2023 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# See the License for the specific language governing permissions and
+# limitations under the License.
+input_stream: "in1"
+input_stream: "in2"
+output_stream: "out"
+node {
+ calculator: "ModelAPISessionCalculator"
+ output_side_packet: "SESSION:dummy"
+ node_options: {
+ [type.googleapis.com / mediapipe.ModelAPIOVMSSessionCalculatorOptions]: {
+ servable_name: "dummy"
+ servable_version: "1"
+ }
+ }
+node {
+ calculator: "ModelAPISessionCalculator"
+ output_side_packet: "SESSION:add"
+ node_options: {
+ [type.googleapis.com / mediapipe.ModelAPIOVMSSessionCalculatorOptions]: {
+ servable_name: "add"
+ servable_version: "1"
+ }
+ }
+node {
+ calculator: "ModelAPISideFeedCalculator"
+ input_side_packet: "SESSION:dummy"
+ input_stream: "DUMMY_IN:in1"
+ output_stream: "DUMMY_OUT:dummy_output"
+ node_options: {
+ [type.googleapis.com / mediapipe.ModelAPIInferenceCalculatorOptions]: {
+ tag_to_input_tensor_names {
+ key: "DUMMY_IN"
+ value: "b"
+ }
+ tag_to_output_tensor_names {
+ key: "DUMMY_OUT"
+ value: "a"
+ }
+ }
+ }
+node {
+ calculator: "ModelAPISideFeedCalculator"
+ input_side_packet: "SESSION:add"
+ input_stream: "ADD_INPUT1:dummy_output"
+ input_stream: "ADD_INPUT2:in2"
+ output_stream: "SUM:out"
+ node_options: {
+ [type.googleapis.com / mediapipe.ModelAPIInferenceCalculatorOptions]: {
+ tag_to_input_tensor_names {
+ key: "ADD_INPUT1"
+ value: "input1"
+ }
+ tag_to_input_tensor_names {
+ key: "ADD_INPUT2"
+ value: "input2"
+ }
+ tag_to_output_tensor_names {
+ key: "SUM"
+ value: "sum"
+ }
+ }
+ }
@@ -0,0 +1,48 @@
+# Copyright (c) 2022 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import datetime
+import argparse
+import tritonclient.grpc as grpcclient
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='Sends requests via KServe gRPC API using images in numpy format. '
+ 'It displays performance statistics and optionally the model accuracy')
+ parser.add_argument('--grpc_address',required=False, default='localhost', help='Specify url to grpc service. default:localhost')
+ parser.add_argument('--grpc_port',required=False, default=9000, help='Specify port to grpc service. default: 9000')
+ args = vars(parser.parse_args())
+ address = "{}:{}".format(args['grpc_address'],args['grpc_port'])
+ triton_client = grpcclient.InferenceServerClient(
+ url=address,
+ verbose=False)
+ inputs = []
+ inputs.append(grpcclient.InferInput("in1", [1,10], "FP32"))
+ inputs.append(grpcclient.InferInput("in2", [1,10], "FP32"))
+ inputs[0].set_data_from_numpy(np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]], dtype=np.float32))
+ inputs[1].set_data_from_numpy(np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]], dtype=np.float32))
+ outputs = []
+ results = triton_client.infer(
+ model_name= "dummyAdd",
+ inputs=inputs,
+ outputs=outputs)
+ output = results.as_numpy("out")
+ print('Output:')
+ print(output)
\ No newline at end of file
@@ -0,0 +1,14 @@
+ "model_config_list": [
+ {"config": {
+ "name": "dummy",
+ "base_path": "dummy"
+ }
+ },
+ {"config": {
+ "name": "add",
+ "base_path": "add_two_inputs_model"
+ }
+ }
+ ]
@@ -3,7 +3,7 @@
This guide shows how to implement a model ensemble using the [DAG Scheduler](../../../docs/dag_scheduler.md).
- Let's consider you develop an application to perform image classification. There are many different models that can be used for this task. The goal is to combine results from inferences executed on two different models and calculate argmax to pick the most probable classification label.
-- For this task, select two models: [googlenet-v2](https://docs.openvino.ai/2022.2/omz_models_public_googlenet_v2_tf_googlenet_v2_tf.html) and [resnet-50](https://docs.openvino.ai/2022.2/omz_models_public_resnet_50_tf_resnet_50_tf.html). Additionally, create own model **argmax** to combine and select top result. The aim is to perform this task on the server side with no intermediate results passed over the network. The server should take care of feeding inputs/outputs in subsequent models. Both - googlenet and resnet predictions should run in parallel.
+- For this task, select two models: [googlenet-v2](https://docs.openvino.ai/2023.0/omz_models_model_googlenet_v2_tf.html) and [resnet-50](https://docs.openvino.ai/2022.2/omz_models_model_resnet_50_tf.html#doxid-omz-models-model-resnet-50-tf). Additionally, create own model **argmax** to combine and select top result. The aim is to perform this task on the server side with no intermediate results passed over the network. The server should take care of feeding inputs/outputs in subsequent models. Both - googlenet and resnet predictions should run in parallel.
- Diagram for this pipeline would look like this:

@@ -0,0 +1,108 @@
+# Using inputs data in string format with universal-sentence-encoder model {#ovms_demo_universal-sentence-encoder}
+## Download the model
+In this experiment we are going to use a TensorFlow model from [tfhub.dev ](https://tfhub.dev/google/universal-sentence-encoder-multilingual/3).
+curl --create-dir https://storage.googleapis.com/tfhub-modules/google/universal-sentence-encoder-multilingual/3.tar.gz -o universal-sentence-encoder-multilingual/1/3.tar.gz
+tar -xzf universal-sentence-encoder-multilingual/1/3.tar.gz -C universal-sentence-encoder-multilingual/1/
+rm universal-sentence-encoder-multilingual/1/3.tar.gz
+chmod -R 755 universal-sentence-encoder-multilingual
+tree universal-sentence-encoder-multilingual/
+└── 1
+ ├── assets
+ ├── saved_model.pb
+ └── variables
+ ├── variables.data-00000-of-00001
+ └── variables.index
+## Optionally build OVMS with CPU extension library for sentencepiece_tokenizer layer
+Model universal-sentence-encoder-multilingual includes a layer SentencepieceTokenizer which is not supported by OpenVINO at the moment. It can be however implemented using a [CPU extension](https://github.com/openvinotoolkit/openvino_contrib/tree/master/modules/custom_operations/user_ie_extensions/sentence_piece), which is a dynamic library performing the execution of the model layer.
+The layer SentencepieceTokenizer expects on the input a list of strings. The CPU extension replaces the input format to an array with UINT8 precision with a shape `[-1]`. It is serialized representation of the list of strings in a form or bytes. When this extension is deployed in OpenVINO Model Server, you don't need to worry about the serialization as it is handled internally. The model server accepts the input in a string format and performs the conversion to OpenVINO requirement transparently.
+The image `openvino/model_server:2023.0` will include ready to use OpenVINO Model Server with the CPU extension. It can be also built from source using the commands:
+git clone https://github.com/openvinotoolkit/model_server
+cd model_server
+make docker_build OV_USE_BINARY=0
+cd ..
+## Start the model server in a container
+When the new docker image is built, you can start the service with a command:
+docker run -d --name ovms -p 9000:9000 -p 8000:8000 -v $(pwd)/universal-sentence-encoder-multilingual:/model openvino/model_server:latest --model_name usem --model_path /model --cpu_extension /ovms/lib/libuser_ov_extensions.so --plugin_config '{"NUM_STREAMS": 1}' --port 9000 --rest_port 8000
+Check the container logs to confirm successful start:
+docker logs ovms
+## Send string data as inference request
+OpenVINO Model Server can accept the input in a form of strings. Below is a code snipped based on `tensorflow_serving_api` python library:
+data = np.array(["string1", "string1", "string_n"])
+predict_request = predict_pb2.PredictRequest()
+predict_request.model_spec.name = "my_model"
+predict_response = prediction_service_stub.Predict(predict_request, 10.0)
+Here is a basic client execution :
+pip install --upgrade pip
+pip install -r model_server/demos/universal-sentence-encoder/requirements.txt
+python model_server/demos/universal-sentence-encoder/send_strings.py --grpc_port 9000 --string "I enjoy taking long walks along the beach with my dog."
+processing time 6.931 ms.
+Output shape (1, 512)
+Output subset [-0.00552395 0.00599533 -0.01480555 0.01098945 -0.09355522 -0.08445048
+ -0.02802683 -0.05219319 -0.0675998 0.03127321 -0.03223499 -0.01282092
+ 0.06131846 0.02626886 -0.00983501 0.00298059 0.00141201 0.03229365
+ 0.06957124 0.01543707]
+The same can be achieved using REST API interface and even a simple `curl` command:
+curl -X POST http://localhost:8000/v1/models/usem:predict \
+-H 'Content-Type: application/json' \
+-d '{"instances": ["dog", "Puppies are nice.", "I enjoy taking long walks along the beach with my dog."]}'
+## Compare results with TFS
+The same client code can be used to send the requests to TensorFlow Serving component. There is full compatibility in the API.
+Start TFS container:
+docker run -it -p 8500:8500 -p 9500:9500 -v $(pwd)/universal-sentence-encoder-multilingual:/models/usem -e MODEL_NAME=usem tensorflow/serving --port=9500 --rest_api_port=8500
+Run the client
+python model_server/demos/universal-sentence-encoder/send_strings.py --grpc_port 9500 --input_name inputs --output_name outputs --string "I enjoy taking long walks along the beach with my dog."
+processing time 12.167000000000002 ms.
+Output shape (1, 512)
+Output subset [-0.00552387 0.00599531 -0.0148055 0.01098951 -0.09355522 -0.08445048
+ -0.02802679 -0.05219323 -0.06759984 0.03127313 -0.03223493 -0.01282088
+ 0.06131843 0.02626882 -0.00983502 0.00298053 0.00141208 0.03229369
+ 0.06957125 0.01543701]
@@ -0,0 +1,2 @@
@@ -0,0 +1,50 @@
+# Copyright (c) 2023 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import grpc
+import numpy as np
+from tensorflow import make_tensor_proto, make_ndarray, make_tensor_proto
+import datetime
+import argparse
+from tensorflow_serving.apis import predict_pb2
+from tensorflow_serving.apis import prediction_service_pb2_grpc
+parser = argparse.ArgumentParser(description='Do requests to ie_serving and tf_serving using images in string format')
+parser.add_argument('--grpc_address',required=False, default='localhost', help='Specify url to grpc service. default:localhost')
+parser.add_argument('--grpc_port',required=False, default=9000, help='Specify port to grpc service. default: 9000')
+parser.add_argument('--input_name',required=False, default='inputs', help='Specify input tensor name. default: inputs')
+parser.add_argument('--output_name',required=False, default='outputs', help='Specify output name. default: outputs')
+parser.add_argument('--model_name', default='usem', help='Define model name, must be same as is in service. default: usem')
+parser.add_argument('--string',required=True, default='', help='String to query.')
+args = vars(parser.parse_args())
+channel = grpc.insecure_channel("{}:{}".format(args['grpc_address'],args['grpc_port']))
+stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
+data = np.array([args['string']])
+predict_request = predict_pb2.PredictRequest()
+predict_request.model_spec.name = args['model_name']
+start_time = datetime.datetime.now()
+predict_response = stub.Predict(predict_request, 10.0)
+end_time = datetime.datetime.now()
+duration = (end_time - start_time).total_seconds() * 1000
+print("processing time", duration, "ms.")
+output = make_ndarray(predict_response.outputs[args['output_name']])
+print("Output shape", output.shape)
+print("Output subset", output[0, :20])
@@ -7,7 +7,7 @@
"layout": "NHWC:NCHW",
"shape": "(1,224,224,3)",
"plugin_config": {
+ "NUM_STREAMS": "1"
diff --git a/docs/accelerators.md b/docs/accelerators.md
@@ -11,63 +11,15 @@ docker run -u $(id -u):$(id -g) -v ${PWD}/models:/models:rw openvino/ubuntu20_de
mv ${PWD}/models/public/resnet-50-tf/FP32 ${PWD}/models/public/resnet-50-tf/1
-## Starting the server with the Intel® Neural Compute Stick 2
-[Intel Movidius Neural Compute Stick 2](https://software.intel.com/en-us/neural-compute-stick) can be employed by OVMS OpenVINO Model Server via
-[the MYRIAD plugin](https://docs.openvino.ai/2022.2/openvino_docs_OV_UG_supported_plugins_MYRIAD.html). It must be visible and accessible on the host machine.
-NCS devices should be reported by the `lsusb` command, printing out `ID 03e7:2485`.
-To start the server with Neural Compute Stick use either of the two options:
-1. Recommended, without the docker privileged mode and mounting only the usb devices.
-.. code-block:: sh
- docker run --rm -it -u 0 --device-cgroup-rule='c 189:* rmw' -v ${PWD}/models/public/resnet-50-tf:/opt/model -v /dev/bus/usb:/dev/bus/usb -p 9001:9001 openvino/model_server \
- --model_path /opt/model --model_name resnet --port 9001 --target_device MYRIAD
-2. Less securely, in the docker privileged mode and mounting all devices.
- ```bash
- docker run --rm -it --net=host -u root --privileged -v ${PWD}/models/public/resnet-50-tf:/opt/model -v /dev:/dev -p 9001:9001 openvino/model_server \
- --model_path /opt/model --model_name resnet --port 9001 --target_device MYRIAD
- ```
-## Starting a Docker Container with HDDL
-To run a container that is using the HDDL accelerator, _hddldaemon_ must be running on the host machine.
-You must set up the environment (the OpenVINO package must be pre-installed) and start _hddldaemon_ on the host before starting a container.
-Refer to the steps from [OpenVINO installation guides](https://docs.openvino.ai/2022.2/openvino_docs_install_guides_installing_openvino_docker_linux.html#running-the-image-on-intel-vision-accelerator-design-with-intel-movidius-vpus).
-An example of a command starting a server with HDDL:
-# --device=/dev/ion:/dev/ion mounts the accelerator device
-# -v /var/tmp:/var/tmp enables communication with _hddldaemon_ running on the host machine
-docker run --rm -it --device=/dev/ion:/dev/ion -v /var/tmp:/var/tmp -v ${PWD}/models/public/resnet-50-tf:/opt/model -p 9001:9001 openvino/model_server:latest \
---model_path /opt/model --model_name resnet --port 9001 --target_device HDDL
-Check out our recommendations for [throughput optimization on HDDL](performance_tuning.md).
-> **NOTE**:
-> the OpenVINO Model Server process within the container communicates with _hddldaemon_ via unix sockets in the `/var/tmp` folder.
-> It requires RW permissions in the docker container security context.
-> It is recommended to start the docker container in the same context as the account starting _hddldaemon_. For example, if you start the _hddldaemon_ as root, add `--user root` to the `docker run` command.
## Starting a Docker Container with Intel integrated GPU, Intel® Data Center GPU Flex Series and Intel® Arc™ GPU
-The [GPU plugin](https://docs.openvino.ai/2022.2/openvino_docs_OV_UG_supported_plugins_GPU.html) uses the Intel Compute Library for
+The [GPU plugin](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_supported_plugins_GPU.html) uses the Intel Compute Library for
Deep Neural Networks ([clDNN](https://01.org/cldnn)) to infer deep neural networks. For inference execution, it employs Intel® Processor Graphics including
Intel® HD Graphics, Intel® Iris® Graphics, Intel® Iris® Xe Graphics, and Intel® Iris® Xe MAX graphics.
Before using GPU as OpenVINO Model Server target device, you need to:
-- install the required drivers - refer to [OpenVINO installation guide](https://docs.openvino.ai/2022.2/openvino_docs_install_guides_installing_openvino_from_archive_linux.html#step-4-optional-configure-inference-on-non-cpu-devices)
+- install the required drivers - refer to [OpenVINO installation guide](https://docs.openvino.ai/2023.0/openvino_docs_install_guides_installing_openvino_from_archive_linux.html#step-4-optional-configure-inference-on-non-cpu-devices)
- start the docker container with the additional parameter of `--device /dev/dri` to pass the device context
- set the parameter of `--target_device` to `GPU`.
- use the `openvino/model_server:latest-gpu` image, which contains GPU dependencies
@@ -107,7 +59,7 @@ Use device `/dev/dxg` instead of `/dev/dri` and mount the volume `/usr/lib/wsl`:
.. code-block:: sh
- docker run --rm -it --device=/dev/dxg --volume /usr/lib/wsl:/usr/lib/wsl --group-add=$(stat -c "%g" /dev/dri/render* | head -n 1) -u $(id -u):$(id -g) \
+ docker run --rm -it --device=/dev/dxg --volume /usr/lib/wsl:/usr/lib/wsl -u $(id -u):$(id -g) \
-v ${PWD}/models/public/resnet-50-tf:/opt/model -p 9001:9001 openvino/model_server:latest-gpu \
--model_path /opt/model --model_name resnet --port 9001 --target_device GPU
@@ -121,7 +73,7 @@ If you need to build the OpenVINO Model Server with different driver version, re
## Using Multi-Device Plugin
If you have multiple inference devices available (e.g. Myriad VPUs and CPU) you can increase inference throughput by enabling the Multi-Device Plugin.
-It distributes Inference requests among multiple devices, balancing out the load. For more detailed information read OpenVINO’s [Multi-Device plugin documentation](https://docs.openvino.ai/2022.2/openvino_docs_OV_UG_Running_on_multiple_devices.html) documentation.
+It distributes Inference requests among multiple devices, balancing out the load. For more detailed information read OpenVINO’s [Multi-Device plugin documentation](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_Running_on_multiple_devices.html) documentation.
To use this feature in OpenVINO Model Server, you can choose one of two ways:
@@ -136,7 +88,7 @@ echo '{"model_config_list": [
"name": "resnet",
"base_path": "/opt/model",
"batch_size": "1",
- "target_device": "MULTI:MYRIAD,CPU"}
+ "target_device": "MULTI:GPU,CPU"}
}' >> models/public/resnet-50-tf/config.json
@@ -153,7 +105,7 @@ openvino/model_server:latest --config_path /opt/model/config.json --port 9001
docker run -d --net=host -u root --privileged --name ie-serving --rm -v ${PWD}/models/public/resnet-50-tf/:/opt/model:ro -v \
-/dev:/dev -p 9001:9001 openvino/model_server:latest model --model_path /opt/model --model_name resnet --port 9001 --target_device 'MULTI:MYRIAD,CPU'
+/dev:/dev -p 9001:9001 openvino/model_server:latest model --model_path /opt/model --model_name resnet --port 9001 --target_device 'MULTI:GPU,CPU'
The deployed model will perform inference on both Intel Movidius Neural Compute Stick and CPU.
@@ -161,7 +113,7 @@ The total throughput will be roughly equal to the sum of CPU and Intel Movidius
## Using Heterogeneous Plugin
-The [HETERO plugin](https://docs.openvino.ai/2022.2/openvino_docs_OV_UG_Hetero_execution.html) makes it possible to distribute inference load of one model
+The [HETERO plugin](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_Hetero_execution.html) makes it possible to distribute inference load of one model
among several computing devices. That way different parts of the deep learning network can be executed by devices best suited to their type of calculations.
OpenVINO automatically divides the network to optimize the process.
@@ -183,7 +135,7 @@ echo '{"model_config_list": [
## Using AUTO Plugin
-[Auto Device](https://docs.openvino.ai/2022.2/openvino_docs_IE_DG_supported_plugins_AUTO.html) (or AUTO in short) is a new special “virtual” or “proxy” device in the OpenVINO toolkit, it doesn’t bind to a specific type of HW device.
+[Auto Device](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_supported_plugins_AUTO.html) (or AUTO in short) is a new special “virtual” or “proxy” device in the OpenVINO toolkit, it doesn’t bind to a specific type of HW device.
AUTO solves the complexity in application required to code a logic for the HW device selection (through HW devices) and then, on the deducing the best optimization settings on that device.
AUTO always chooses the best device, if compiling model fails on this device, AUTO will try to compile it on next best device until one of them succeeds.
Make sure you have passed the devices and access to the devices you want to use in for the docker image. For example with:
@@ -243,16 +195,22 @@ The docker image of OpenVINO Model Server including support for NVIDIA can be bu
git clone https://github.com/openvinotoolkit/model_server.git
cd model_server
- make docker_build NVIDIA=1 OV_USE_BINARY=0 OV_SOURCE_BRANCH=releases/2022/3 OV_CONTRIB_BRANCH=releases/2022/3
+ make docker_build NVIDIA=1 OV_USE_BINARY=0 OV_SOURCE_BRANCH=master OV_CONTRIB_BRANCH=master
+ cd ..
Check also [building from sources](https://github.com/openvinotoolkit/model_server/blob/develop/docs/build_from_source.md).
Example command to run container with NVIDIA support:
- docker run -it --gpus all -p 9178:9178 -v ${PWD}/models/public/resnet-50-tf:/opt/model openvino/model_server:latest-cuda --model_path /opt/model --model_name resnet --target_device NVIDIA
+ docker run -it --gpus all -p 9000:9000 -v ${PWD}/models/public/resnet-50-tf:/opt/model openvino/model_server:latest-cuda --model_path /opt/model --model_name resnet --port 9000 --target_device NVIDIA
+For models with layers not supported on NVIDIA plugin, you can use a vritual pluging `HETERO` which can use multiple devices listed after the colon:
+ docker run -it --gpus all -p 9000:9000 -v ${PWD}/models/public/resnet-50-tf:/opt/model openvino/model_server:latest-cuda --model_path /opt/model --model_name resnet --port 9000 --target_device HETERO:NVIDIA,CPU
Check the supported [configuration parameters](https://github.com/openvinotoolkit/openvino_contrib/tree/master/modules/nvidia_plugin#supported-configuration-parameters) and [supported layers](https://github.com/openvinotoolkit/openvino_contrib/tree/master/modules/nvidia_plugin#supported-layers-and-limitations)
-Currently the AUTO, MULTI and HETERO virual plugins do not support NVIDIA plugin as an alternative device.
\ No newline at end of file
+Currently the AUTO and MULTI virual plugins do not support NVIDIA plugin as an alternative device.
\ No newline at end of file
+++ b/docs/advanced_topics.md
@@ -19,7 +19,7 @@ Implement any CPU layer, that is not support by OpenVINO yet, as a shared librar
[Learn more](../src/example/SampleCpuExtension/README.md)
## Model Cache
-Leverage the OpenVINO [model caching](https://docs.openvino.ai/latest/openvino_docs_OV_UG_Model_caching_overview.html) feature to speed up subsequent model loading on a target device.
+Leverage the OpenVINO [model caching](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_Model_caching_overview.html) feature to speed up subsequent model loading on a target device.
[Learn more](model_cache.md)
+++ b/docs/binary_input.md
@@ -1,4 +1,4 @@
-# Support for Binary Input Data {#ovms_docs_binary_input}
+# Support for Binary Encoded Image Input Data {#ovms_docs_binary_input}
@@ -20,4 +20,4 @@ automatically from JPEG/PNG to OpenVINO friendly format using built-in [OpenCV](
- [TensorFlow Serving API](./binary_input_tfs.md)
- [KServe API](./binary_input_kfs.md)
- It's worth noting that with KServe API, you can also send raw data (that does not require processing by OpenCV) in binary form via REST. This makes KServe API more performant choice while working with REST interface. The guide linked above explains how to work with both regular data in binary format as well as JPEG/PNG encoded images.
\ No newline at end of file
+It's worth noting that with KServe API, you can also send raw data with or without image encoding via REST API. This makes KServe REST API more performant choice comparing to json format in TFS API. The guide linked above explains how to work with both regular data in binary format as well as JPEG/PNG encoded images.
\ No newline at end of file
+++ b/docs/binary_input_kfs.md
@@ -4,21 +4,27 @@
KServe API allows sending the model input data in a variety of formats inside the [InferTensorContents](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#tensor-data-1) objects or in `raw_input_contents` field of [ModelInferRequest](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#inference-1).
-When the data is sent in the `bytes_contents` field of `InferTensorContents` and input `datatype` is set to `BYTES`, such input is interpreted as a binary encoded image. The `BYTES` datatype is dedicated to binary encoded **images** and if it's set, the data **must** be placed in `bytes_contents` or in `raw_input_contents` if batch size is equal to 1.
+When the data is sent to the model or pipeline that have 4 (or 5 in case of [demultiplexing](demultiplexing.md)) shape dimensions and input `datatype` is set to `BYTES`, such input is interpreted as a binary encoded image. Data of such inputs **must** be placed in `bytes_contents` or in `raw_input_contents`
+If data is located in `raw_input_contents` you need to precede data of every batch by 4 bytes(little endian) conatining size of this batch. For example, if batch would contain three images of sizes 370, 480, 500 bytes the content of raw_input_contents[index_of_the_input] would look like this:
+<0x72010000 (=370)><370 bytes of first image><0xE0010000 (=480)><480 bytes of second image> <0xF4010000 (=500)><500 bytes of third image>
Note, that while the model metadata reports the inputs shape with layout `NHWC`, the binary data must be sent with
shape: `[N]` with datatype: `BYTES`. Where `N` represents number of images converted to string bytes.
-When sending data in the array format, the shape and datatype gives information on how to interpret bytes in the contents. For binary encoded data, the only information given by the `shape` field is the amount of images in the batch. On the server side, the bytes in each element of the `bytes_contents` field are loaded, resized to match model input shape and converted to the OpenVINO-friendly array format by OpenCV.
+When sending data in the array format, the shape and datatype gives information on how to interpret bytes in the contents. For binary encoded data, the only information given by the `shape` field is the amount of images in the batch. On the server side, the bytes of every batch are loaded, resized to match model input shape and converted to the OpenVINO-friendly array format by OpenCV.
### JPEG / PNG encoded images
-KServe API also allows sending binary encoded data via HTTP interface. The tensor binary data is provided in the request body, after JSON object. While the JSON part contains information required to route the data to the target model and run inference properly, the data itself, in the binary format is placed right after the JSON.
+KServe API also allows sending encoded images via HTTP interface to the model or pipeline that have 4 (or 5 in case of [demultiplexing](demultiplexing.md)) shape dimensions. Similar to GRPC input with such datatype `datatype` needs to be `BYTES`. The tensor binary data is provided in the request body, after JSON object. While the JSON part contains information required to route the data to the target model and run inference properly, the data itself, in the binary format is placed right after the JSON. Therefore, you need to precede data of every image by 4 bytes(little endian) conatining size of this image and specify their combined size in `binary_data_size` parameter.
+For binary inputs, the `parameters` map in the JSON part contains `binary_data_size` field for each binary input that indicates the size of the data on the input. Since there's no strict limitations on image resolution and format (as long as it can be loaded by OpenCV), images might be of different sizes. To send a batch of images you need to precede data of every batch by 4 bytes(little endian) conatining size of this batch and specify their combined size in `binary_data_size`. For example, if batch would contain three images of sizes 370, 480, 500 bytes the content of input buffer inside binary extension would look like this:
+<0x72010000 (=370)><370 bytes of first image><0xE0010000 (=480)><480 bytes of second image> <0xF4010000 (=500)><500 bytes of third image>
+And in that case binary_data_size would be 1350(370 + 480 + 500)
+Function set_data_from_numpy in triton client lib that we use in our [REST sample](https://github.com/openvinotoolkit/model_server/blob/develop/client/python/kserve-api/samples/http_infer_binary_resnet.py) automatically converts given images to this format.
-For binary inputs, the `parameters` map in the JSON part contains `binary_data_size` field for each binary input that indicates the size of the data on the input. Since there's no strict limitations on image resolution and format (as long as it can be loaded by OpenCV), images might be of different sizes. Therefore, to send a batch of different images, specify their sizes in `binary_data_size` field as a list with sizes of all images in the batch.
-The list must be formed as a string, so for example, for 3 images in the batch, you may pass - `"9821,12302,7889"`.
If the request contains only one input `binary_data_size` parameter can be omitted - in this case whole buffer is treated as a input image.
For HTTP request headers, `Inference-Header-Content-Length` header must be provided to give the length of the JSON object, and `Content-Length` continues to give the full body length (as HTTP requires). See an extended example with the request headers, and multiple images in the batch:
@@ -42,7 +48,7 @@ For the Raw Data binary inputs `binary_data_size` parameter can be omitted since
## Usage examples
-Sample clients that use binary inputs via KFS API can be found here ([REST sample](https://github.com/openvinotoolkit/model_server/blob/develop/client/python/kserve-api/samples/http_infer_binary_resnet.py))/([GRPC sample](https://github.com/openvinotoolkit/model_server/blob/develop/client/python/kserve-api/samples/http_infer_binary_resnet.py))
+Sample clients that use binary inputs via KFS API can be found here ([REST sample](https://github.com/openvinotoolkit/model_server/blob/develop/client/python/kserve-api/samples/http_infer_binary_resnet.py))/([GRPC sample](https://github.com/openvinotoolkit/model_server/blob/develop/client/python/kserve-api/samples/grpc_infer_binary_resnet.py))
Also, see the ([README](https://github.com/openvinotoolkit/model_server/blob/develop/client/python/kserve-api/samples/README.md))
diff --git a/docs/binary_input_tfs.md b/docs/binary_input_tfs.md
index c2475e93e5..9d565e415f 100644
--- a/docs/binary_input_tfs.md
+++ b/docs/binary_input_tfs.md
@@ -5,7 +5,7 @@
TensorFlow Serving API allows sending the model input data in a variety of formats inside the [TensorProto](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor.proto) objects.
Array data is passed inside the `tensor_content` field, which represents the input data buffer.
-When the data is sent in the `string_val` field, such input is interpreted as a binary encoded image.
+When the data is sent in the `string_val` field to the model or pipeline that have 4 (or 5 in case of [demultiplexing](demultiplexing.md)) shape dimensions, such input is interpreted as a binary encoded image.
Note, that while the model metadata reports the inputs shape with layout NHWC, the binary data must be sent with
shape: [N] with dtype: DT_STRING. Where N represents number of images converted to string bytes.
@@ -14,7 +14,7 @@ When sending data in the array format, all bytes are in the same sequence in `te
-TensorFlow Serving API also allows sending binary encoded data via HTTP interface. The binary data needs to be Base64 encoded and put into `inputs` or `instances` field as a map in form:
+TensorFlow Serving API also allows sending encoded images via HTTP interface to the model or pipeline that have 4 (or 5 in case of [demultiplexing](demultiplexing.md)) shape dimensions. The binary data needs to be Base64 encoded and put into `inputs` or `instances` field as a map in form:
: {"b64":}
diff --git a/docs/build_from_source.md b/docs/build_from_source.md
@@ -5,7 +5,7 @@ This document gives information how to build docker images and the binary packag
## Prerequisites
1. [Docker Engine](https://docs.docker.com/engine/)
-1. Ubuntu 20.04 or RedHat 8.7 host
+1. Ubuntu 20.04, Ubuntu 22.04 or RedHat 8.7 host
1. make
1. bash
@@ -14,8 +14,8 @@ This document gives information how to build docker images and the binary packag
Makefile located in root directory of this repository contains all targets needed to build docker images and binary packages.
It contains `docker_build` target which by default builds multiple docker images:
-- `openvino/model_server:latest` - smallest release image containing only neccessary files to run model server on CPU, NCS and HDDL
-- `openvino/model_server:latest-gpu` - release image containing support for Intel GPU
+- `openvino/model_server:latest` - smallest release image containing only neccessary files to run model server on CPU
+- `openvino/model_server:latest-gpu` - release image containing support for Intel GPU and CPU
- `openvino/model_server:latest-nginx-mtls` - release image containing examplary NGINX MTLS configuration
- `openvino/model_server-build:latest` - image with builder environment containing all the tools to build OVMS
@@ -58,6 +58,16 @@ Select base OS:
make docker_build BASE_OS=redhat
+Select ubuntu base image version:
+- `20.04` ubuntu:20.04 (default value)
+- `22.04` ubuntu:22.04
+make docker_build BASE_OS_TAG_UBUNTU=22.04
@@ -67,8 +77,9 @@ Example:
Parameter used to control which GPU driver version will be installed. Supported versions:
| OS | Versions |
-| Ubuntu | 22.35.24055 (default), 22.10.22597, 21.48.21782, 20.35.17767 |
-| RedHat | 22.28.23726 (default), 22.10.22597, 21.38.21026, 20.35.17767 |
+| Ubuntu22 | 23.13.26032 (default), 22.35.24055, 22.10.22597, 21.48.21782 |
+| Ubuntu20 | 22.43.24595 (default), 22.35.24055, 22.10.22597, 21.48.21782 |
+| RedHat | 22.43.24595 (default), 22.28.23726, 22.10.22597, 21.38.21026 |
Additionally it is possible to specify custom (pre-production) drivers by providing location to NEO Runtime packages on local disk. Contact Intel representative to get the access to the pre-production drivers.
Warning: _Maintained only for Ubuntu base OS._
@@ -111,23 +122,64 @@ Use together with `OV_CONTRIB_BRANCH` to specify which branch from [OpenVINO con
-make docker_build NVIDIA=1 OV_USE_BINARY=0 OV_SOURCE_BRANCH=releases/2022/3 OV_CONTRIB_BRANCH=releases/2022/3
-docker run -it --gpus all -p 9178:9178 -v ${PWD}/models/public/resnet-50-tf:/opt/model openvino/model_server:latest-cuda --model_path /opt/model --model_name resnet --target_device NVIDIA
+make docker_build NVIDIA=1 OV_USE_BINARY=0 OV_SOURCE_BRANCH=master OV_CONTRIB_BRANCH=master
+Note. In order to build the image with redhat UBI8.7 as the base os, it is required to use a host with RedHat subscription and entitlements in `/etc/pki/entitlement` and `/etc/rhsm`.
+That is required to install several building dependencies.
By default set to `1`. When set to `0`, OpenVINO will be built from sources and `DLDT_PACKAGE_URL` will be omitted.
-Use `OV_SOURCE_BRANCH` to select [OpenVINO repository](https://github.com/openvinotoolkit/openvino) branch. By default `master` will be used.
-Warning: _Maintained only for Ubuntu base OS._
+Use `OV_SOURCE_BRANCH` and `OV_SOURCE_ORG` to select [OpenVINO repository](https://github.com/openvinotoolkit/openvino) branch and fork. By default `master` will be used and org `openvinotoolkit`.
+### `RUN_TESTS`
+Enables or disabled unit tests execution as part of the docker image building.
+- `0` Unit tests are skipped
+- `1` Unit tests are executed (default)
+make docker_build RUN_TESTS=0
+Running the unit tests will make the building last longer and it will consume a bit more RAM
+Enables or disabled calculating the unit tests coverage as part of the docker image building.
+- `0` Checking the coverage is skipped
+- `1` Checking the coverage is included
+make docker_build RUN_TESTS=0
+Running the unit tests will increase build time and consume more RAM
+### `JOBS`
+Number of compilation jobs. By default it is set to the number of CPU cores. On hosts with low RAM, this value can be reduced to avoid out of memory errors during the compilation.
+make docker_build JOBS=2
+When set to `0`, OpenVINO&trade Model Server will be built with [MediaPipe](mediapipe.md) support. Default value: `0`.
-make docker_build OV_USE_BINARY=0 OV_SOURCE_BRANCH=
+make docker_build MEDIAPIPE_DISABLE=0
Read more detailed usage in [developer guide](https://github.com/openvinotoolkit/model_server/blob/develop/docs/developer_guide.md).
diff --git a/docs/clients_kfs.md b/docs/clients_kfs.md
@@ -331,50 +331,54 @@ When creating a Python-based client application, you can use Triton client libra
.. code-block:: python
- from tritonclient.grpc import service_pb2, service_pb2_grpc
- from tritonclient.utils import *
+ import tritonclient.grpc as grpclient
- client = grpcclient.InferenceServerClient("localhost:9000")
+ triton_client = grpclient.InferenceServerClient(
+ url="address",
+ ssl=False,
+ verbose=False)
image_data = []
with open("image_path", 'rb') as f:
inputs = []
- inputs.append(service_pb2.ModelInferRequest().InferInputTensor())
- inputs[0].name = args['input_name']
- inputs[0].datatype = "BYTES"
- inputs[0].shape.extend([1])
- inputs[0].contents.bytes_contents.append(image_data[0])
+ inputs.append(grpclient.InferInput('input_name', 1, "BYTES"))
+ nmpy = np.array(image_data , dtype=np.object_)
+ inputs[0].set_data_from_numpy(nmpy)
outputs = []
- outputs.append(service_pb2.ModelInferRequest().InferRequestedOutputTensor())
- outputs[0].name = "output_name"
+ outputs.append(grpclient.InferRequestedOutput("output_name"))
- request = service_pb2.ModelInferRequest()
- request.model_name = "model_name'"
- request.inputs.extend(inputs)
- request.outputs.extend(outputs)
- response = grpc_stub.ModelInfer(request)
+ results = triton_client.infer(model_name="model_name",
+ inputs=inputs,
+ outputs=outputs)
.. tab:: python [REST]
.. code-block:: python
- import requests
- import json
- url = f"http://{address}/v2/models/{model_name}/infer"
- http_session = requests.session()
+ import tritonclient.http as httpclient
+ triton_client = httpclient.InferenceServerClient(
+ url="address",
+ ssl=False,
+ ssl_options=None,
+ verbose=False)
image_data = []
- image_binary_size = []
with open("image_path", 'rb') as f:
- image_binary_size.append(len(image_data[-1]))
- image_binary_size_str = ",".join(map(str, image_binary_size))
- inference_header = {"inputs":[{"name":input_name,"shape":[batch_i],"datatype":"BYTES","parameters":{"binary_data_size":image_binary_size_str}}]}
- inference_header_binary = json.dumps(inference_header).encode()
+ inputs = []
+ inputs.append(httpclient.InferInput('input_name', 1, "BYTES"))
+ nmpy = np.array(image_data , dtype=np.object_)
+ inputs[0].set_data_from_numpy(nmpy)
- results = http_session.post(url, inference_header_binary + b''.join(image_data), headers={"Inference-Header-Content-Length":str(len(inference_header_binary))})
+ outputs = []
+ outputs.append(httpclient.InferRequestedOutput("output_name"))
+ results = triton_client.infer(model_name="model_name",
+ inputs=inputs,
+ outputs=outputs)
.. tab:: cpp [GRPC]
@@ -387,29 +391,26 @@ When creating a Python-based client application, you can use Triton client libra
std::unique_ptr client;
tc::InferenceServerGrpcClient::Create(&client, "localhost:9000");
- std::vector shape{1, 10};
+ std::vector shape{1};
tc::InferInput* input;
- tc::InferInput::Create(&input, "input_name", shape, "FP32");
+ tc::InferInput::Create(&input, "input_name", shape, "BYTES");
std::shared_ptr input_ptr;
- std::ifstream fileImg("image_path", std::ios::binary);
- fileImg.seekg(0, std::ios::end);
- int bufferLength = fileImg.tellg();
- fileImg.seekg(0, std::ios::beg);
- char* buffer = new char[bufferLength];
- fileImg.read(buffer, bufferLength);
+ std::ifstream file(fileName, std::ios::binary);
+ file.unsetf(std::ios::skipws);
+ std::streampos fileSize;
- std::vector input_data = std::vector(buffer, buffer + bufferLength);
- input_ptr->AppendRaw(input_data);
+ file.seekg(0, std::ios::end);
+ fileSize = file.tellg();
+ file.seekg(0, std::ios::beg);
+ std::ostringstream oss;
+ oss << file.rdbuf();
+ input_ptr->AppendFromString({oss.str()});
tc::InferOptions options("model_name");
tc::InferResult* result;
client->Infer(&result, options, inputs);
- input->Reset();
- delete buffer;
.. tab:: cpp [REST]
@@ -425,27 +426,24 @@ When creating a Python-based client application, you can use Triton client libra
std::vector shape{1};
tc::InferInput* input;
- tc::InferInput::Create(&input, input_name, shape, "BYTES");
+ tc::InferInput::Create(&input, "input_name", shape, "BYTES");
std::shared_ptr input_ptr;
- std::ifstream fileImg("image_path", std::ios::binary);
- fileImg.seekg(0, std::ios::end);
- int bufferLength = fileImg.tellg();
- fileImg.seekg(0, std::ios::beg);
- char* buffer = new char[bufferLength];
- fileImg.read(buffer, bufferLength);
+ std::ifstream file(fileName, std::ios::binary);
+ file.unsetf(std::ios::skipws);
+ std::streampos fileSize;
- std::vector input_data = std::vector(buffer, buffer + bufferLength);
- input_ptr->AppendRaw(input_data);
+ file.seekg(0, std::ios::end);
+ fileSize = file.tellg();
+ file.seekg(0, std::ios::beg);
+ std::ostringstream oss;
+ oss << file.rdbuf();
+ input_ptr->AppendFromString({oss.str()});
tc::InferOptions options("model_name");
tc::InferResult* result;
client->Infer(&result, options, inputs);
- input->Reset();
- delete buffer;
.. tab:: java
@@ -470,8 +468,10 @@ When creating a Python-based client application, you can use Triton client libra
FileInputStream imageStream = new FileInputStream("image_path");
- request.clearRawInputContents();
- request.addRawInputContents(ByteString.readFrom(imageStream));
+ InferTensorContents.Builder input_data = InferTensorContents.newBuilder();
+ input_data.addBytesContents(ByteString.readFrom(imageStream));
+ input.setContents(input_data);
+ request.addInputs(0, input);
ModelInferResponse response = grpc_stub.modelInfer(request.build());
@@ -488,20 +488,24 @@ When creating a Python-based client application, you can use Triton client libra
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
- inferInputs := []*grpc_client.ModelInferRequest_InferInputTensor{
- &grpc_client.ModelInferRequest_InferInputTensor{
- Name: "0",
- Datatype: "BYTES",
- Shape: []int64{1},
- },
+ bytes, err := ioutil.ReadFile(fileName)
+ contents := grpc_client.InferTensorContents{}
+ contents.BytesContents = append(contents.BytesContents, bytes)
+ inferInput := grpc_client.ModelInferRequest_InferInputTensor{
+ Name: "0",
+ Datatype: "BYTES",
+ Shape: []int64{1},
+ Contents: &contents,
- bytes, err := ioutil.ReadFile(fileName)
- modelInferRequest.RawInputContents = append(modelInferRequest.RawInputContents, bytes)
+ inferInputs := []*grpc_client.ModelInferRequest_InferInputTensor{
+ &inferInput,
+ }
modelInferRequest := grpc_client.ModelInferRequest{
- ModelName: "model_name",
- ModelVersion: "model_version",
+ ModelName: modelName,
+ ModelVersion: modelVersion,
Inputs: inferInputs,
@@ -514,6 +518,9 @@ When creating a Python-based client application, you can use Triton client libra
echo -n '{"inputs” : [{"name" : "0", "shape" : [1], "datatype" : "BYTES"}]}' > request.json
stat --format=%s request.json
+ printf "%x\n" `stat -c "%s" ./image.jpeg`
+ 1c21
+ echo -n -e '\x21\x1c\x00\x00' >> request.json
cat ./image.jpeg >> request.json
curl --data-binary "@./request.json" -X POST http://localhost:8000/v2/models/resnet/versions/0/infer -H "Inference-Header-Content-Length: 66"
@@ -546,7 +553,7 @@ When creating a Python-based client application, you can use Triton client libra
data = np.array([1.0, 2.0, ..., 1000.0])
infer_input = httpclient.InferInput("input_name", data.shape, "FP32")
- results = client.infer("model_name", [infer_input]
+ results = client.infer("model_name", [infer_input])
.. tab:: cpp [GRPC]
@@ -563,7 +570,7 @@ When creating a Python-based client application, you can use Triton client libra
tc::InferInput* input;
tc::InferInput::Create(&input, "input_name", shape, "FP32");
std::shared_ptr input_ptr;
- input_ptr.reset(input)
+ input_ptr.reset(input);
std::vector input_data(10);
for (size_t i = 0; i < 10; ++i) {
@@ -592,7 +599,7 @@ When creating a Python-based client application, you can use Triton client libra
tc::InferInput* input;
tc::InferInput::Create(&input, "input_name", shape, "FP32");
std::shared_ptr input_ptr;
- input_ptr.reset(input)
+ input_ptr.reset(input);
std::vector input_data(10);
for (size_t i = 0; i < 10; ++i) {
@@ -684,4 +691,98 @@ When creating a Python-based client application, you can use Triton client libra
+### Request Prediction on a string
+.. tab:: python [GRPC]
+ .. code-block:: python
+ import numpy as np
+ import tritonclient.grpc as grpcclient
+ client = grpcclient.InferenceServerClient("localhost:9000")
+ data = ""
+ input = np.array([data.encode('utf-8')], dtype=np.object_)
+ infer_input = grpcclient.InferInput("input_name", [1], "BYTES")
+ infer_input.set_data_from_numpy(input)
+ results = client.infer("model_name", [infer_input])
+.. tab:: python [REST]
+ .. code-block:: python
+ import numpy as np
+ import tritonclient.http as httpclient
+ client = httpclient.InferenceServerClient("localhost:9000")
+ data = ""
+ input = np.array([data.encode('utf-8')], dtype=np.object_)
+ infer_input = httpclient.InferInput("input_name", [1], "BYTES")
+ infer_input.set_data_from_numpy(input)
+ results = client.infer("model_name", [infer_input])
+.. tab:: cpp [GRPC]
+ .. code-block:: cpp
+ #include "grpc_client.h"
+ namespace tc = triton::client;
+ int main(int argc, char** argv) {
+ std::unique_ptr client;
+ tc::InferenceServerGrpcClient::Create(&client, "localhost:9000");
+ tc::InferInput* input;
+ tc::InferInput::Create(&input, "input_name", {1}, "BYTES");
+ std::shared_ptr input_ptr;
+ input_ptr.reset(input);
+ input_ptr->AppendFromString({std::string("")});
+ std::vector inputs = {input_ptr.get()};
+ tc::InferOptions options("model_name");
+ tc::InferResult* results;
+ client->Infer(&results, options, inputs);
+ std::shared_ptr results_ptr;
+ results_ptr.reset(results);
+ return 0;
+ }
+.. tab:: cpp [REST]
+ .. code-block:: cpp
+ #include "http_client.h"
+ namespace tc = triton::client;
+ int main(int argc, char** argv) {
+ std::unique_ptr client;
+ tc::InferenceServerHttpClient::Create(&client, "localhost:9000");
+ tc::InferInput* input;
+ tc::InferInput::Create(&input, "input_name", {1}, "BYTES");
+ std::shared_ptr input_ptr;
+ input_ptr.reset(input);
+ input_ptr->AppendFromString({std::string("")});
+ std::vector inputs = {input_ptr.get()};
+ tc::InferOptions options("model_name");
+ tc::InferResult* results;
+ client->Infer(&results, options, inputs);
+ std::shared_ptr results_ptr;
+ results_ptr.reset(results);
+ return 0;
+ }
+.. tab:: curl
+ .. code-block:: sh
+ curl -X POST http://localhost:9000/v2/models/model_name/infer
+ -H 'Content-Type: application/json'
+ -d '{"inputs" : [ {"name" : "input_name", "shape" : [ 1 ], "datatype" : "BYTES", "data" : [""]} ]}'
For complete usage examples see [Kserve samples](https://github.com/openvinotoolkit/model_server/tree/develop/client/python/kserve-api/samples).
diff --git a/docs/clients_tfs.md b/docs/clients_tfs.md
index e65b78b8bd..4e1b07c57a 100644
--- a/docs/clients_tfs.md
+++ b/docs/clients_tfs.md
@@ -285,6 +285,60 @@ When creating a Python-based client application, there are two packages on PyPi
+### Request Prediction on a string
+.. tab:: ovmsclient [GRPC]
+ .. code-block:: python
+ from ovmsclient import make_grpc_client
+ client = make_grpc_client("localhost:9000")
+ data = [""]
+ inputs = {"input_name": data}
+ results = client.predict(inputs=inputs, model_name="my_model")
+.. tab:: ovmsclient [REST]
+ .. code-block:: python
+ from ovmsclient import make_http_client
+ client = make_http_client("localhost:8000")
+ data = [""]
+ inputs = {"input_name": data}
+ results = client.predict(inputs=inputs, model_name="my_model")
+.. tab:: tensorflow-serving-api
+ .. code-block:: python
+ import grpc
+ from tensorflow_serving.apis import prediction_service_pb2_grpc, predict_pb2
+ from tensorflow import make_tensor_proto
+ channel = grpc.insecure_channel("localhost:9000")
+ prediction_service_stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
+ data = [""]
+ predict_request = predict_pb2.PredictRequest()
+ predict_request.model_spec.name = "my_model"
+ predict_request.inputs["input_name"].CopyFrom(make_tensor_proto(data))
+ predict_response = prediction_service_stub.Predict(predict_request, 1)
+ results = predict_response.outputs["output_name"]
+.. tab:: curl
+ .. code-block:: sh
+ curl -X POST http://localhost:8000/v1/models/my_model:predict
+ -H 'Content-Type: application/json'
+ -d '{"instances": [{"input_name": ""}]}'
For complete usage examples see [ovmsclient samples](https://github.com/openvinotoolkit/model_server/tree/releases/2022/1/client/python/ovmsclient/samples).
diff --git a/docs/custom_node_development.md b/docs/custom_node_development.md
index aba0516bd6..cb7798c531 100644
--- a/docs/custom_node_development.md
+++ b/docs/custom_node_development.md
@@ -123,6 +123,27 @@ Just add include statement like:
#include "opencv2/core.hpp"
+## String support
+There are special consideration when handling in the custom nodes the input sent by the clients as string. Such data when received by the OVMS frontend, is automatically converted to a 2D array with shape [-1,-1]. Example of custom node using this feature is our [Tokenizer](https://github.com/openvinotoolkit/model_server/tree/develop/src/custom_nodes/tokenizer).
+### inputs
+When strings are send to the custom node that has 2-dimensional shape and U8 precision OVMS, after receiving request containig such inputs converts them to the 2 dimensional U8 array of shape [number of strings, length of the longest string + 1] with padding filled with zeros. For example batch of three strings ["String_123", "", "zebra"] would be converted to:
+['S', 't', 'r', 'i', 'n', 'g', '_', '1', '2', '3', 0, // String_123
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ""
+'z', 'e', 'b', 'r', 'a', 0, 0, 0, 0, 0, 0] // "zebra"
+### outputs
+When the name of the custom node output is suffixed with _string, its shape has 2 dimensions and precision is U8 OVMS treats data of such output as array that contains string in every row with padding filled with zeros and convert automatically data of such outputs to strings. For example U8 array:
+['S', 't', 'r', 'i', 'n', 'g', '_', '1', '2', '3', 0, // String_123
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ""
+'z', 'e', 'b', 'r', 'a', 0, 0, 0, 0, 0, 0] // "zebra"
+would be converted to ["String_123", "", "zebra"].
## Building
Custom node library can be compiled using any tool. It is recommended to follow the example based
diff --git a/docs/deploying_server.md b/docs/deploying_server.md
index e52ca2e180..fd60c0560f 100644
--- a/docs/deploying_server.md
+++ b/docs/deploying_server.md
@@ -13,7 +13,7 @@ This is a step-by-step guide on how to deploy OpenVINO™ Model Server on Li
- [Docker Engine](https://docs.docker.com/engine/) installed
- Intel® Core™ processor (6-13th gen.) or Intel® Xeon® processor (1st to 4th gen.)
- Linux, macOS or Windows via [WSL](https://docs.microsoft.com/en-us/windows/wsl/)
-- (optional) AI accelerators [supported by OpenVINO](https://docs.openvino.ai/2022.2/openvino_docs_IE_DG_supported_plugins_Supported_Devices.html). Accelerators are tested only on bare-metal Linux hosts.
+- (optional) AI accelerators [supported by OpenVINO](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_Working_with_devices.html). Accelerators are tested only on bare-metal Linux hosts.
### Launch Model Server Container
@@ -74,37 +74,95 @@ If everything is set up correctly, you will see 'zebra' prediction in the output
## Deploying Model Server on Baremetal (without container)
It is possible to deploy Model Server outside of container.
-To deploy Model Server on baremetal, use pre-compiled binaries for Ubuntu20 or RHEL8.
-Find latest binary package in [release](https://github.com/openvinotoolkit/model_server/releases) page.
-Alternatively it is possible to build package from source:
+To deploy Model Server on baremetal, use pre-compiled binaries for Ubuntu20, Ubuntu22 or RHEL8.
-git clone https://github.com/openvinotoolkit/model_server
-cd model_server
-make docker_build
-The `ovms.tar.gz` package will appear in `dist/ubuntu` or `dist/redhat` directory.
-Unpack the package:
+.. tab:: Ubuntu 20.04
-tar -xzvf dist/ubuntu/ovms.tar.gz
+ Download precompiled package:
+ .. code-block:: sh
-Install required libraries depending on the OS.
-For Ubuntu 20.04:
-apt update -y && apt install -y libpugixml1v5 libtbb2
-For RedHat 8.7:
-microdnf install -y pkg-config && rpm -ivh https://vault.centos.org/centos/8/AppStream/x86_64/os/Packages/tbb-2018.2-9.el8.x86_64.rpm
+ wget https://github.com/openvinotoolkit/model_server/releases/download/v2023.0/ovms_ubuntu20.tar.gz
+ or build it yourself:
+ .. code-block:: sh
+ # Clone the model server repository
+ git clone https://github.com/openvinotoolkit/model_server
+ cd model_server
+ # Build docker images (the binary is one of the artifacts)
+ make docker_build
+ # Unpack the package
+ tar -xzvf dist/ubuntu/ovms.tar.gz
+ Install required libraries:
+ .. code-block:: sh
+ sudo apt update -y && apt install -y libpugixml1v5 libtbb2
+.. tab:: Ubuntu 22.04
+ Download precompiled package:
+ .. code-block:: sh
+ wget https://github.com/openvinotoolkit/model_server/releases/download/v2023.0/ovms_ubuntu22.tar.gz
+ or build it yourself:
+ .. code-block:: sh
+ # Clone the model server repository
+ git clone https://github.com/openvinotoolkit/model_server
+ cd model_server
+ # Build docker images (the binary is one of the artifacts)
+ make docker_build BASE_OS_TAG_UBUNTU=22.04
+ # Unpack the package
+ tar -xzvf dist/ubuntu/ovms.tar.gz
+ Install required libraries:
+ .. code-block:: sh
+ sudo apt update -y && apt install -y libpugixml1v5
+.. tab:: RHEL 8.7
+ Download precompiled package:
+ .. code-block:: sh
+ wget https://github.com/openvinotoolkit/model_server/releases/download/v2023.0/ovms_redhat.tar.gz
+ or build it yourself:
+ .. code-block:: sh
+ # Clone the model server repository
+ git clone https://github.com/openvinotoolkit/model_server
+ cd model_server
+ # Build docker images (the binary is one of the artifacts)
+ make docker_build BASE_OS=redhat
+ # Unpack the package
+ tar -xzvf dist/redhat/ovms.tar.gz
+ Install required libraries:
+ .. code-block:: sh
+ sudo dnf install -y pkg-config && sudo rpm -ivh https://vault.centos.org/centos/8/AppStream/x86_64/os/Packages/tbb-2018.2-9.el8.x86_64.rpm
Start the server:
wget https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.1/models_bin/2/resnet50-binary-0001/FP32-INT1/resnet50-binary-0001.{xml,bin} -P models/resnet50/1
./ovms/bin/ovms --model_name resnet --model_path models/resnet50
@@ -115,7 +173,7 @@ Learn more about model server [starting parameters](parameters.md).
> **NOTE**:
> When serving models on [AI accelerators](accelerators.md), some additional steps may be required to install device drivers and dependencies.
-> Learn more in the [Additional Configurations for Hardware](https://docs.openvino.ai/latest/openvino_docs_install_guides_configurations_header.html) documentation.
+> Learn more in the [Additional Configurations for Hardware](https://docs.openvino.ai/2023.0/openvino_docs_install_guides_configurations_header.html) documentation.
## Deploying Model Server in Kubernetes
diff --git a/docs/dynamic_shape_dynamic_model.md b/docs/dynamic_shape_dynamic_model.md
index aae42a41be..8b1c71e5a3 100644
--- a/docs/dynamic_shape_dynamic_model.md
+++ b/docs/dynamic_shape_dynamic_model.md
@@ -8,7 +8,7 @@ Enable dynamic shape by setting the `shape` parameter to range or undefined:
- `--shape "(1,3,200:500,200:500)"` when model is supposed to support height and width values in a range of 200-500. Note that any dimension can support range of values, height and width are only examples here.
> Note that some models do not support dynamic dimensions. Learn more about supported model graph layers including all limitations
-on [Shape Inference Document](https://docs.openvino.ai/2022.2/openvino_docs_IE_DG_ShapeInference.html).
+on [Shape Inference Document](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_ShapeInference.html).
Another option to use dynamic shape feature is to export the model with dynamic dimension using Model Optimizer. OpenVINO Model Server will inherit the dynamic shape and no additional settings are needed.
diff --git a/docs/features.md b/docs/features.md
index 22e392ae0e..af13d1dee6 100644
--- a/docs/features.md
+++ b/docs/features.md
@@ -8,6 +8,7 @@
+ ovms_docs_text
@@ -16,6 +17,7 @@
+ ovms_docs_mediapipe
diff --git a/docs/mediapipe.md b/docs/mediapipe.md
new file mode 100644
index 0000000000..7d7772824b
--- /dev/null
+++ b/docs/mediapipe.md
@@ -0,0 +1,194 @@
+# Integration with mediapipe (preview) {#ovms_docs_mediapipe}
+.. toctree::
+ :maxdepth: 1
+ :hidden:
+ ovms_docs_mediapipe_calculators
+## Introduction
+MediaPipe is an open-source framework for building pipelines to perform inference over arbitrary sensory data. Using MediaPipe in the OVMS enables user to define a powerful graph from a lot of ready calculators/nodes that come with the MediaPipe which support all the needed features for running a stable graph like e.g. flow limiter node. User can also run the graph in a server or run it inside application host. Here can be found more information about [MediaPipe framework ](https://developers.google.com/mediapipe/framework/framework_concepts/overview)
+This guide gives information about:
+* How to build OVMS with MediaPipe support
+* OVMS Calculators
+* Graph proto files
+* Configuration files
+* Using the mediapipe graphs
+* Graphs examples
+* Current Limitations
+## How to build OVMS with mediapipe support
+Building OVMS with mediapipe support requires passing additional flag for make command, for example:
+MEDIAPIPE_DISABLE=0 make docker_build
+More information about OVMS build parameters can be found here [here](https://github.com/openvinotoolkit/model_server/blob/develop/docs/build_from_source.md).
+## Node Types
+"Each calculator is a node of a graph. The bulk of graph execution happens inside its calculators. OVMS has its own calculators but can also use newly developed calculators or reuse the existing calculators defined in the original mediapipe repository."
+For more details you can visit mediapipe concept description - [Calculators Concept Page](https://developers.google.com/mediapipe/framework/framework_concepts/calculators) or OVMS specific calculators implementation - [Ovms Calculators Concept Page](https://github.com/openvinotoolkit/model_server/blob/releases/2023/0/src/mediapipe_calculators/calculators.md)
+## Graph proto files
+Graph proto files are used to define a graph. Example content of proto file with graph containing ModelAPICalculator nodes:
+input_stream: "in1"
+input_stream: "in2"
+output_stream: "out"
+node {
+ calculator: "ModelAPISessionCalculator"
+ output_side_packet: "SESSION:dummy"
+ node_options: {
+ [type.googleapis.com / mediapipe.ModelAPIOVMSSessionCalculatorOptions]: {
+ servable_name: "dummy"
+ servable_version: "1"
+ }
+ }
+node {
+ calculator: "ModelAPISessionCalculator"
+ output_side_packet: "SESSION:add"
+ node_options: {
+ [type.googleapis.com / mediapipe.ModelAPIOVMSSessionCalculatorOptions]: {
+ servable_name: "add"
+ servable_version: "1"
+ }
+ }
+node {
+ calculator: "ModelAPISideFeedCalculator"
+ input_side_packet: "SESSION:dummy"
+ input_stream: "DUMMY_IN:in1"
+ output_stream: "DUMMY_OUT:dummy_output"
+ node_options: {
+ [type.googleapis.com / mediapipe.ModelAPIInferenceCalculatorOptions]: {
+ tag_to_input_tensor_names {
+ key: "DUMMY_IN"
+ value: "b"
+ }
+ tag_to_output_tensor_names {
+ key: "DUMMY_OUT"
+ value: "a"
+ }
+ }
+ }
+node {
+ calculator: "ModelAPISideFeedCalculator"
+ input_side_packet: "SESSION:add"
+ input_stream: "ADD_INPUT1:dummy_output"
+ input_stream: "ADD_INPUT2:in2"
+ output_stream: "SUM:out"
+ node_options: {
+ [type.googleapis.com / mediapipe.ModelAPIInferenceCalculatorOptions]: {
+ tag_to_input_tensor_names {
+ key: "ADD_INPUT1"
+ value: "input1"
+ }
+ tag_to_input_tensor_names {
+ key: "ADD_INPUT2"
+ value: "input2"
+ }
+ tag_to_output_tensor_names {
+ key: "SUM"
+ value: "sum"
+ }
+ }
+ }
+Here can be found more information about [MediaPipe graphs proto](https://developers.google.com/mediapipe/framework/framework_concepts/graphs)
+## Configuration files
+MediaPipe graph configuration is to be placed in the same json file like the
+[models config file](starting_server.md).
+While models are defined in section `model_config_list`, graphs are configured in
+the `mediapipe_config_list` section.
+Basic graph section template is depicted below:
+ "model_config_list": [...],
+ "mediapipe_config_list": [
+ {
+ "name":"mediaDummy",
+ "base_path":"/mediapipe/graphs/",
+ "graph_path":"graphdummyadapterfull.pbtxt",
+ "subconfig":"subconfig_dummy.json"
+ }
+ ]
+Basic subconfig:
+ "model_config_list": [
+ {"config": {
+ "name": "dummy",
+ "base_path": "/models/dummy",
+ "shape": "(1, 10)"
+ }
+ }
+ ]
+Nodes in the MediaPipe graphs can reference both to the models configured in model_config_list section and in subconfig.
+### MediaPipe configuration options explained
+|`"name"`|string|Graph identifier related to name field specified in gRPC/REST request|Yes|
+|`"base_path"`|string|Path to the which graph definition and subconfig files paths are relative. May be absolute or relative to the main config path. Default value is "(main config path)\"|No|
+|`"graph_path"`|string|Path to the graph proto file. May be absolute or relative to the base_path. Default value is "(base_path)\graph.pbtxt". File have to exist.|No|
+|`"subconfig"`|string|Path to the subconfig file. May be absolute or relative to the base_path. Default value is "(base_path)\subconfig.json". Missing file does not result in error.|No|
+Subconfig file may only contain *model_config_list* section - in the same format as in [models config file](starting_server.md).
+## Using Mediapipe
+MediaPipe graphs can use the same KServe Inference API as the models. There are exactly the same calls for running
+the predictions. The request format must match the pipeline definition inputs.
+Graphs can be queried for their state using the calls [GetModelStatus](model_server_grpc_api_kfs.md)
+and [REST Model Status](model_server_rest_api_kfs.md)
+## MediaPipe Graps Examples
+[Image classification](../demos/mediapipe/image_classification/README.md)
+[Multi model](../demos/mediapipe/multi_model_graph/README.md)
+## Current limitations
+- It is preview version of the MediaPipe integrations which means that its not ready to be used in production and only some of the OVMS features are supported for Mediapipe graphs.
+- Mediapipe graphs are supported only for GRPC KFS API. Only TFS calls supported are get model status and config reload.
+- Binary inputs are not supported for MediaPipe graphs.
+- Public images do not include mediapipe feature.
+- Making changes in subconfig file does not trigger config reloads. Main config changes are monitored and triggers subconfig reload even if those weren't changed.
diff --git a/docs/model_cache.md b/docs/model_cache.md
index 00d493a175..d409f948a3 100644
--- a/docs/model_cache.md
+++ b/docs/model_cache.md
@@ -1,7 +1,7 @@
# Model Cache {#ovms_docs_model_cache}
## Overview
-The Model Server can leverage a [OpenVINO™ model cache functionality](https://docs.openvino.ai/2022.2/openvino_docs_IE_DG_Model_caching_overview.html), to speed up subsequent model loading on a target device.
+The Model Server can leverage a [OpenVINO™ model cache functionality](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_Model_caching_overview.html), to speed up subsequent model loading on a target device.
The cached files make the Model Server initialization usually faster.
The boost depends on a model and a target device. The most noticable improvement will be observed with GPU devices. On other devices, like CPU, it is possible to observe no speed up effect or even slower loading process depending on used model. Test the setup before final deployment.
@@ -42,8 +42,6 @@ In case there are valid reasons to enable the model cache also for models with a
> IMPORTANT: Models imported via the custom loaders never create or use any cache.
-> IMPORTANT: Model cache can't be used with HDDL devices.
## Use case example
### Prepare model
diff --git a/docs/model_server_c_api.md b/docs/model_server_c_api.md
index b62e0411d6..eca9d5b4bc 100644
--- a/docs/model_server_c_api.md
+++ b/docs/model_server_c_api.md
@@ -11,6 +11,12 @@ Server functionalities are encapsulated in shared library built from OVMS source
Calling a method to start the model serving in your application initiates the OVMS as a separate thread. Then you can schedule inference both directly from app using C API and gRPC/HTTP endpoints.
+API is versioned according to [SemVer 2.0](https://semver.org/). Calling `OVMS_ApiVersion` it is possible to get `major` and `minor` version number.
+- major - incremented when new, backward incompatible changes are introduced to the API itself (API call removal, name change, parameter change)
+- minor - incremented when API is modified but backward compatible (new API call added)
+There is no patch version number. Underlying functionality changes not related to API itself are tracked via OVMS version. OVMS and OpenVINO versions can be tracked via logs or `ServerMetadata` request (via KServe API).
### Server configuration and start
To start OVMS you need to create `OVMS_Server` object using `OVMS_ServerNew`, with set of `OVMS_ServerSettings` and `OVMS_ModelsSettings` that describe how the server should be configured. Once the server is started using `OVMS_ServerStartFromConfigurationFile` you can schedule the inferences using `OVMS_Inference`. To stop server, you must call `OVMS_ServerDelete`. While the server is alive you can schedule both in process inferences as well as use gRPC API to schedule inferences from remote machine. Optionally you can also enable HTTP service. Example how to use OVMS with C/C++ application is [here](../demos/c_api_minimal_app/README.md).
diff --git a/docs/model_server_grpc_api_kfs.md b/docs/model_server_grpc_api_kfs.md
index bcd140dca8..378307e0f0 100644
--- a/docs/model_server_grpc_api_kfs.md
+++ b/docs/model_server_grpc_api_kfs.md
@@ -45,9 +45,9 @@ Run inference with requested model or [DAG](./dag_scheduler.md).
Check KServe documentation for more [details](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#inference-1).
-> **NOTE**: Inference supports putting tensor buffers either in `ModelInferRequest`'s [InferTensorContents](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/grpc_predict_v2.proto#L155) and [raw_input_contents](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/grpc_predict_v2.proto#L202). There is no support for BF16 data type and there is no support for using FP16 in `InferTensorContents`. In case of sending raw images jpeg files BYTES data type should be used and data should be put in `InferTensorContents`'s `bytes_contents` or `raw_input_contents` for batch size equal to 1.
+> **NOTE**: Inference supports putting tensor buffers either in `ModelInferRequest`'s [InferTensorContents](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/grpc_predict_v2.proto#L155) and [raw_input_contents](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/grpc_predict_v2.proto#L202). There is no support for BF16 data type and there is no support for using FP16 in `InferTensorContents`. In case of sending images files or strings BYTES data type should be used and data should be put in `InferTensorContents`'s `bytes_contents` or `raw_input_contents`.
-Also, using `BYTES` datatype it is possible to send binary encoded images that would be preprocessed by OVMS using opencv and converted to OpenVINO-friendly format. For more information check [how binary data is handled in OpenVINO Model Server](./binary_input_kfs.md)
+Also, using `BYTES` datatype it is possible to send to model or pipeline, that have 4 (or 5 in case of [demultiplexing](demultiplexing.md)) shape dimensions, binary encoded images that would be preprocessed by OVMS using opencv and converted to OpenVINO-friendly format. For more information check [how binary data is handled in OpenVINO Model Server](./binary_input_kfs.md)
## See Also
diff --git a/docs/model_server_rest_api_kfs.md b/docs/model_server_rest_api_kfs.md
index 1d5f7b9792..d54c817c45 100644
--- a/docs/model_server_rest_api_kfs.md
+++ b/docs/model_server_rest_api_kfs.md
@@ -225,9 +225,11 @@ $request_output =
-Besides numerical values, it is possible to pass binary inputs using Binary Data extension:
+> Note: In `tensor_data` elements may be presented in their multi-dimensional representation, or as a flattened one-dimensional representation. Before inference execution tensor data is flattened, and only elements count in `tensor_data` is validated.
-As a JPEG / PNG encoded images - in this case binary encoded data is loaded by OVMS using OpenCV which then converts it to OpenVINO-friendly data format for inference. For encoded inputs datatype `BYTES` is reserved.
+Besides numerical values, it is possible to pass encoded images using Binary Data extension:
+As a JPEG / PNG encoded images - in this case binary encoded data is loaded by OVMS using OpenCV which then converts it to OpenVINO-friendly data format for inference. Input is treated as encoded image when datatype is `BYTES` and model or pipeline have 4 (or 5 in case of [demultiplexing](demultiplexing.md)) shape dimensions. Every batch the BYTES input needs to be preced by 4 bytes, litte endian, that contains its size.
Content-Type: application/octet-stream
@@ -243,7 +245,7 @@ Content-Length:
-<9472 bytes of data for model_input tensor>
+<0x00250000 (9472 as four bytes little endian)><9472 bytes of data for model_input tensor>
@@ -267,6 +269,8 @@ Content-Length:
<3240000 bytes of the whole data batch for model_input tensor>
+*sending strings inside binary extension also require preceding every batch by 4 bytes, litte endian, that contains its size.
Check [how binary data is handled in OpenVINO Model Server](./binary_input.md) for more informations.
diff --git a/docs/models_repository.md b/docs/models_repository.md
index 26ee01c7a7..06fe9ea4de 100644
--- a/docs/models_repository.md
+++ b/docs/models_repository.md
@@ -1,19 +1,17 @@
# Preparing a Model Repository {#ovms_docs_models_repository}
The AI models served by OpenVINO™ Model Server must be in either of the four formats:
-- [OpenVINO IR](https://docs.openvino.ai/2022.2/openvino_docs_MO_DG_IR_and_opsets.html#doxid-openvino-docs-m-o-d-g-i-r-and-opsets), where the graph is represented in .bin and .xml files
+- [OpenVINO IR](https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_IR_and_opsets.html#doxid-openvino-docs-m-o-d-g-i-r-and-opsets), where the graph is represented in .bin and .xml files
- [ONNX](https://onnx.ai/), using the .onnx file
- [PaddlePaddle](https://www.paddlepaddle.org.cn/en), using .pdiparams and .pdmodel files
-- [TensorFlow](https://www.tensorflow.org/), using frozen graph format with .pb extension (preview feature)
+- [TensorFlow](https://www.tensorflow.org/), using SavedModel, MetaGraph or frozen Protobuf formats.
To use models trained in other formats you need to convert them first. To do so, use
-OpenVINO’s [Model Optimizer](https://docs.openvino.ai/2022.2/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html) for IR, or different
+OpenVINO’s [Model Optimizer](https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html) for IR, or different
[converters](https://onnx.ai/supported-tools.html) for ONNX.
-The feature of direct import of Tensorflow models is currently a preview feature. Currently it supports only the frozen graph and not all topologies can be used that way.
-For unsupported models you can use the Model Optimizer to convert the model to IR format.
The models need to be placed and mounted in a particular directory structure and according to the following rules:
+When the models are hosted on the cloud storage, they should be frozen to be imported successfully.
tree models/
@@ -37,9 +35,15 @@ models/
│ └── 1
│ ├── model.pdiparams
│ └── model.pdmodel
-└── model5
+├── model5
+│ └── 1
+│ ├── model.pdiparams
+│ └── model.pdmodel
+└── model6
└── 1
- └── TF_fronzen_model.pb
+ ├── variables
+ └── saved_model.pb
- Each model should be stored in a dedicated directory, e.g. model1 and model2.
diff --git a/docs/ovms.png b/docs/ovms.png
deleted file mode 100644
index 2e51f20616..0000000000
Binary files a/docs/ovms.png and /dev/null differ
diff --git a/docs/ovms_quickstart.md b/docs/ovms_quickstart.md
index 5d3abace70..70705d069a 100644
--- a/docs/ovms_quickstart.md
+++ b/docs/ovms_quickstart.md
@@ -1,12 +1,12 @@
# Quickstart Guide {#ovms_docs_quick_start_guide}
-OpenVINO Model Server can perform inference using pre-trained models in either [OpenVINO IR](https://docs.openvino.ai/2022.2/openvino_docs_MO_DG_IR_and_opsets.html#doxid-openvino-docs-m-o-d-g-i-r-and-opsets)
-, [ONNX](https://onnx.ai/), PaddlePaddle[https://github.com/PaddlePaddle/Paddle] or TensorFlow format [https://www.tensorflow.org/]. You can get them by:
+OpenVINO Model Server can perform inference using pre-trained models in either [OpenVINO IR](https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_IR_and_opsets.html#doxid-openvino-docs-m-o-d-g-i-r-and-opsets)
+, [ONNX](https://onnx.ai/), [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) or [TensorFlow](https://www.tensorflow.org/) format. You can get them by:
- downloading models from [Open Model Zoo](https://storage.openvinotoolkit.org/repositories/open_model_zoo/public/2022.1/)
-- converting other formats using [Model Optimizer](https://docs.openvino.ai/2022.2/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html)
+- converting other formats using [Model Optimizer](https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html)
-This guide uses a [face detection model](https://docs.openvino.ai/latest/omz_models_model_face_detection_retail_0004.html) in IR format.
+This guide uses a [face detection model](https://docs.openvino.ai/2023.0/omz_models_model_face_detection_retail_0004.html) in IR format.
To quickly start using OpenVINO™ Model Server follow these steps:
1. Prepare Docker
diff --git a/docs/parameters.md b/docs/parameters.md
index 0bfc3545bf..3bff8303f7 100644
--- a/docs/parameters.md
+++ b/docs/parameters.md
@@ -6,14 +6,14 @@
| Option | Value format | Description |
| `"model_name"/"name"` | `string` | Model name exposed over gRPC and REST API.(use `model_name` in command line, `name` in json config) |
-| `"model_path"/"base_path"` | `string` | If using a Google Cloud Storage, Azure Storage or S3 path, see [cloud storage guide](./using_cloud_storage.md). The path may look as follows: `"/opt/ml/models/model"` `"gs://bucket/models/model"` `"s3://bucket/models/model"` `"azure://bucket/models/model"` (use `model_path` in command line, `base_path` in json config) |
-| `"shape"` | `tuple/json/"auto"` | `shape` is optional and takes precedence over `batch_size`. The `shape` argument changes the model that is enabled in the model server to fit the parameters. `shape` accepts three forms of the values: * `auto` - The model server reloads the model with the shape that matches the input data matrix. * a tuple, such as `(1,3,224,224)` - The tuple defines the shape to use for all incoming requests for models with a single input. * A dictionary of shapes, such as `{"input1":"(1,3,224,224)","input2":"(1,3,50,50)", "input3":"auto"}` - This option defines the shape of every included input in the model.Some models don't support the reshape operation.If the model can't be reshaped, it remains in the original parameters and all requests with incompatible input format result in an error. See the logs for more information about specific errors.Learn more about supported model graph layers including all limitations at [Shape Inference Document](https://docs.openvino.ai/2022.2/openvino_docs_IE_DG_ShapeInference.html). |
+| `"model_path"/"base_path"` | `string` | If using a Google Cloud Storage, Azure Storage or S3 path, see [cloud storage guide](./using_cloud_storage.md). The path may look as follows: `"/opt/ml/models/model"` `"gs://bucket/models/model"` `"s3://bucket/models/model"` `"azure://bucket/models/model"` The path can be also relative to the config.json location (use `model_path` in command line, `base_path` in json config) |
+| `"shape"` | `tuple/json/"auto"` | `shape` is optional and takes precedence over `batch_size`. The `shape` argument changes the model that is enabled in the model server to fit the parameters. `shape` accepts three forms of the values: * `auto` - The model server reloads the model with the shape that matches the input data matrix. * a tuple, such as `(1,3,224,224)` - The tuple defines the shape to use for all incoming requests for models with a single input. * A dictionary of shapes, such as `{"input1":"(1,3,224,224)","input2":"(1,3,50,50)", "input3":"auto"}` - This option defines the shape of every included input in the model.Some models don't support the reshape operation.If the model can't be reshaped, it remains in the original parameters and all requests with incompatible input format result in an error. See the logs for more information about specific errors.Learn more about supported model graph layers including all limitations at [Shape Inference Document](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_ShapeInference.html). |
| `"batch_size"` | `integer/"auto"` | Optional. By default, the batch size is derived from the model, defined through the OpenVINO Model Optimizer. `batch_size` is useful for sequential inference requests of the same batch size.Some models, such as object detection, don't work correctly with the `batch_size` parameter. With these models, the output's first dimension doesn't represent the batch size. You can set the batch size for these models by using network reshaping and setting the `shape` parameter appropriately.The default option of using the Model Optimizer to determine the batch size uses the size of the first dimension in the first input for the size. For example, if the input shape is `(1, 3, 225, 225)`, the batch size is set to `1`. If you set `batch_size` to a numerical value, the model batch size is changed when the service starts.`batch_size` also accepts a value of `auto`. If you use `auto`, then the served model batch size is set according to the incoming data at run time. The model is reloaded each time the input data changes the batch size. You might see a delayed response upon the first request. |
| `"layout" `| `json/string` | `layout` is optional argument which allows to define or change the layout of model input and output tensors. To change the layout (add the transposition step), specify `:`. Example: `NHWC:NCHW` means that user will send input data in `NHWC` layout while the model is in `NCHW` layout.
When specified without colon separator, it doesn't add a transposition but can determine the batch dimension. E.g. `--layout CN` makes prediction service treat second dimension as batch size.
When the model has multiple inputs or the output layout has to be changed, use a json format. Set the mapping, such as: `{"input1":"NHWC:NCHW","input2":"HWN:NHW","output1":"CN:NC"}`.
If not specified, layout is inherited from model.
[Read more](shape_batch_size_and_layout.md#changing-model-inputoutput-layout) |
| `"model_version_policy"` | `json/string` | Optional. The model version policy lets you decide which versions of a model that the OpenVINO Model Server is to serve. By default, the server serves the latest version. One reason to use this argument is to control the server memory consumption.The accepted format is in json or string. Examples: `{"latest": { "num_versions":2 }` `{"specific": { "versions":[1, 3] } }` `{"all": {} }` |
| `"plugin_config"` | `json/string` | List of device plugin parameters. For full list refer to [OpenVINO documentation](https://docs.openvino.ai/2022.3/openvino_docs_OV_UG_supported_plugins_Supported_Devices.html) and [performance tuning guide](./performance_tuning.md). Example: `{"PERFORMANCE_HINT": "LATENCY"}` |
| `"nireq"` | `integer` | The size of internal request queue. When set to 0 or no value is set value is calculated automatically based on available resources.|
-| `"target_device"` | `string` | Device name to be used to execute inference operations. Accepted values are: `"CPU"/"HDDL"/"GPU"/"MYRIAD"/"MULTI"/"HETERO"` |
+| `"target_device"` | `string` | Device name to be used to execute inference operations. Accepted values are: `"CPU"/"GPU"/"MULTI"/"HETERO"` |
| `"stateful"` | `bool` | If set to true, model is loaded as stateful. |
| `"idle_sequence_cleanup"` | `bool` | If set to true, model will be subject to periodic sequence cleaner scans. See [idle sequence cleanup](stateful_models.md). |
| `"max_sequence_number"` | `uint32` | Determines how many sequences can be handled concurrently by a model instance. |
@@ -21,6 +21,14 @@
| `"metrics_enable"` | `bool` | Flag enabling [metrics](https://docs.openvino.ai/2022.3/ovms_docs_metrics.html) endpoint on rest_port. |
| `"metrics_list"` | `string` | Comma separated list of [metrics](https://docs.openvino.ai/2022.3/ovms_docs_metrics.html). If unset, only default metrics will be enabled.|
+> **Note** : Specifying config_path is mutually exclusive with putting model parameters in the CLI ([serving multiple models](./starting_server.md)).
+| Option | Value format | Description |
+| `config_path` | `string` | Absolute path to json configuration file |
## Server configuration options
Configuration options for the server are defined only via command-line options and determine configuration common for all served models.
@@ -36,9 +44,12 @@ Configuration options for the server are defined only via command-line options a
| `file_system_poll_wait_seconds` | `integer` | Time interval between config and model versions changes detection in seconds. Default value is 1. Zero value disables changes monitoring. |
| `sequence_cleaner_poll_wait_minutes` | `integer` | Time interval (in minutes) between next sequence cleaner scans. Sequences of the models that are subjects to idle sequence cleanup that have been inactive since the last scan are removed. Zero value disables sequence cleaner. See [idle sequence cleanup](stateful_models.md). |
| `custom_node_resources_cleaner_interval_seconds` | `integer` | Time interval (in seconds) between two consecutive resources cleanup scans. Default is 1. Must be greater than 0. See [custom node development](custom_node_development.md). |
-| `cpu_extension` | `string` | Optional path to a library with [custom layers implementation](https://docs.openvino.ai/2022.2/openvino_docs_Extensibility_UG_Intro.html). |
+| `cpu_extension` | `string` | Optional path to a library with [custom layers implementation](https://docs.openvino.ai/2023.0/openvino_docs_Extensibility_UG_Intro.html). |
| `log_level` | `"DEBUG"/"INFO"/"ERROR"` | Serving logging level |
| `log_path` | `string` | Optional path to the log file. |
| `cache_dir` | `string` | Path to the model cache storage. Caching will be enabled if this parameter is defined or the default path /opt/cache exists |
+| `grpc_channel_arguments` | `string` | A comma separated list of arguments to be passed to the grpc server. (e.g. grpc.max_connection_age_ms=2000) |
+| `help` | `NA` | Shows help message and exit |
+| `version` | `NA` | Shows binary version |
diff --git a/docs/performance_tuning.md b/docs/performance_tuning.md
index f7665dfbf9..928b480bbb 100644
--- a/docs/performance_tuning.md
+++ b/docs/performance_tuning.md
@@ -47,7 +47,7 @@ GPU
This mode prioritizes low latency, providing short response time for each inference job. It performs best for tasks where inference is required for a single input image, like a medical analysis of an ultrasound scan image. It also fits the tasks of real-time or nearly real-time applications, such as an industrial robot's response to actions in its environment or obstacle avoidance for autonomous vehicles.
-Note that currently the `PERFORMANCE_HINT` property is supported by CPU and GPU devices only. [More information](https://docs.openvino.ai/2022.2/openvino_docs_IE_DG_supported_plugins_AUTO.html#performance-hints).
+Note that currently the `PERFORMANCE_HINT` property is supported by CPU and GPU devices only. [More information](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_Performance_Hints.html#performance-hints-how-it-works).
To enable Performance Hints for your application, use the following command:
@@ -78,16 +78,17 @@ OpenVINO™ Model Server can be tuned to a single client use case or a high
execution streams. They split the available resources to perform parallel execution of multiple requests.
It is particularly efficient for models which cannot effectively consume all CPU cores or for CPUs with high number of cores.
-By default, number of streams is calculated based on number of available CPUs. It gives a compromise between the single client scenario and the high concurrency.
-If this default configuration is not suitable, adjust it with the `NUM_STREAMS` parameter defined as part
-of the device plugin configuration.
+By default, number of streams is optimized for execution with minimal latency with low concurrency. The number of execution streams will be equal to the number of CPU sockets or GPU cards.
+If that default configuration is not suitable, adjust it with the `NUM_STREAMS` parameter defined as part
+of the device plugin configuration or set the performance hint to `THROUGHPUT`.
-In a scenario where the number of parallel connections is close to 1, set the following parameter:
+In a scenario with a single connections/client, set the following parameter:
`--plugin_config '{"NUM_STREAMS": "1"}'`
-When the number of concurrent requests is higher, increase the number of streams. Make sure, however, that the number of streams is lower than the average volume of concurrent inference operations. Otherwise, the server might not be fully utilized.
-Number of streams should not exceed the number of CPU cores.
+When the number of concurrent requests is high, increase the number of streams. Make sure, however, that the number of streams is lower than the average volume of concurrent inference operations. Otherwise, the server might not be fully utilized.
+Number of streams should not exceed the number of cores.
For example, with ~50 clients sending the requests to the server with 48 cores, set the number of streams to 24:
@@ -120,7 +121,7 @@ In case of using CPU plugin to run the inference, it might be also beneficial to
| NUM_STREAMS | Specifies number of execution streams for the throughput mode |
-> **NOTE:** For additional information about all parameters read [OpenVINO supported plugins](https://docs.openvino.ai/latest/groupov_runtime_cpp_prop_api.html?#detailed-documentation).
+> **NOTE:** For additional information about all parameters read about [OpenVINO device properties](https://docs.openvino.ai/2023.0/groupov_runtime_cpp_prop_api.html?#detailed-documentation).
- Example:
Following docker command will set `NUM_STREAMS` parameter to a value `1`:
@@ -135,7 +136,7 @@ docker run --rm -d --cpuset-cpus 0,1,2,3 -v ${PWD}/models/public/resnet-50-tf:/o
> **NOTE:** Deployment of the OpenVINO Model Server including the autoscaling capability can be automated in Kubernetes and OpenShift using the operator. [Read more about](https://github.com/openvinotoolkit/operator/blob/main/docs/autoscaling.md)
## CPU Power Management Settings
-To save power, the OS can decrease the CPU frequency and increase a volatility of the latency values. Similarly the Intel® Turbo Boost Technology may also affect the stability of results. For best reproducibility, consider locking the frequency to the processor base frequency (refer to the https://ark.intel.com/ for your specific CPU). For example, in Linux setting the relevant values for the /sys/devices/system/cpu/cpu* entries does the trick. [Read more](https://docs.openvino.ai/2022.2/openvino_docs_optimization_guide_dldt_optimization_guide.html). High-level commands like cpupower also exists:
+To save power, the OS can decrease the CPU frequency and increase a volatility of the latency values. Similarly the Intel® Turbo Boost Technology may also affect the stability of results. For best reproducibility, consider locking the frequency to the processor base frequency (refer to the https://ark.intel.com/ for your specific CPU). For example, in Linux setting the relevant values for the /sys/devices/system/cpu/cpu* entries does the trick. High-level commands like cpupower also exists:
$ cpupower frequency-set --min 3.1GHz
@@ -163,7 +164,7 @@ The default value is 1 second which ensures prompt response to creating new mode
Depending on the device employed to run the inference operation, you can tune the execution behavior with a set of parameters. Each device is handled by its OpenVINO plugin.
-> **NOTE**: For additional information, read [supported configuration parameters for all plugins](https://docs.openvino.ai/latest/groupov_runtime_cpp_prop_api.html?#detailed-documentation).
+> **NOTE**: For additional information, read [supported configuration parameters for all plugins](https://docs.openvino.ai/2023.0/groupov_runtime_cpp_prop_api.html?#detailed-documentation).
Model's plugin configuration is a dictionary of param:value pairs passed to OpenVINO Plugin on network load. It can be set with `plugin_config` parameter.
@@ -178,11 +179,11 @@ docker run --rm -d -v ${PWD}/models/public/resnet-50-tf:/opt/model -p 9001:9001
## Analyzing performance issues
Recommended steps to investigate achievable performance and discover bottlenecks:
-1. [Launch OV benchmark app](https://docs.openvino.ai/latest/openvino_inference_engine_tools_benchmark_tool_README.html?highlight=benchmark)
+1. [Launch OV benchmark app](https://docs.openvino.ai/2023.0/openvino_inference_engine_tools_benchmark_tool_README.html?highlight=benchmark)
**Note:** It is useful to drop plugin configuration from benchmark app using `-dump_config` and then use the same plugin configuration in model loaded into OVMS
**Note:** When launching benchmark app use `-inference_only=false`. Otherwise OV avoids setting input tensor of inference each time which is not comparable flow to OVMS.
-2. [Launch OVMS benchmark client](https://docs.openvino.ai/latest/ovms_demo_benchmark_client.html) on the same machine as OVMS
-3. [Launch OVMS benchmark client](https://docs.openvino.ai/latest/ovms_demo_benchmark_client.html) from remote machine
+2. [Launch OVMS benchmark client](https://docs.openvino.ai/2023.0/ovms_demo_benchmark_client.html) on the same machine as OVMS
+3. [Launch OVMS benchmark client](https://docs.openvino.ai/2023.0/ovms_demo_benchmark_client.html) from remote machine
4. Measure achievable network bandwidth with tools such as [iperf](https://github.com/esnet/iperf)
diff --git a/docs/security_considerations.md b/docs/security_considerations.md
index 72b8d2dd5e..2424aa979c 100644
--- a/docs/security_considerations.md
+++ b/docs/security_considerations.md
@@ -18,5 +18,5 @@ OpenVINO Model Server currently does not provide access restrictions and traffic
See also:
- [Securing OVMS with NGINX](../extras/nginx-mtls-auth/README.md)
-- [Securing models with OVSA](https://docs.openvino.ai/2022.2/ovsa_get_started.html)
+- [Securing models with OVSA](https://docs.openvino.ai/2023.0/ovsa_get_started.html)
diff --git a/docs/shape_batch_size_and_layout.md b/docs/shape_batch_size_and_layout.md
index 614a267156..90ad6351a2 100644
--- a/docs/shape_batch_size_and_layout.md
+++ b/docs/shape_batch_size_and_layout.md
@@ -28,7 +28,7 @@ it ignores the batch_size value.
- JSON object e.g. `{"input1":"(1,3,224,224)","input2":"(1,3,50,50)"}` - it defines a shape of every included input in the model
*Note:* Some models do not support the reshape operation. Learn more about supported model graph layers including all limitations
-on [Shape Inference Document](https://docs.openvino.ai/2022.2/openvino_docs_IE_DG_ShapeInference.html).
+on [Shape Inference Document](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_ShapeInference.html).
In case the model can't be reshaped, it will remain in the original parameters and all requests with incompatible input format
will get an error. The model server will also report such problems in the logs.
diff --git a/docs/starting_server.md b/docs/starting_server.md
index d76850d45e..1c23eb6bee 100644
--- a/docs/starting_server.md
+++ b/docs/starting_server.md
@@ -120,9 +120,8 @@ To serve multiple models from the same container you will need an additional JSO
- "shape": "auto",
"nireq": 32,
- "target_device": "HDDL"
+ "target_device": "GPU"
@@ -130,6 +129,8 @@ To serve multiple models from the same container you will need an additional JSO
Once the Docker container has the path to your config file mounted, it can be started. This simplifies the `docker run` command, as arguments are now read from the config file.
+When the `base_path` in the config.json is without cloud URI prefix or `/` character, the path will be relative to the config file location.
+This is helpful when models are distributed together with the config file, the paths do not need to be adjusted.
## Next Steps
diff --git a/docs/stateful_models.md b/docs/stateful_models.md
index b8cb2c7538..3523f2177c 100644
--- a/docs/stateful_models.md
+++ b/docs/stateful_models.md
@@ -71,7 +71,7 @@ docker run -d -u $(id -u):$(id -g) -v $(pwd)/rm_lstm4f:/models/stateful_model -v
| `stateful` | `bool` | If set to true, model is loaded as stateful. | false |
| `idle_sequence_cleanup` | `bool` | If set to true, model will be subject to periodic sequence cleaner scans. See [idle sequence cleanup](#stateful_cleanup). | true |
| `max_sequence_number` | `uint32` | Determines how many sequences can be handled concurrently by a model instance. | 500 |
-| `low_latency_transformation` | `bool` | If set to true, model server will apply [low latency transformation](https://docs.openvino.ai/2022.2/openvino_docs_IE_DG_network_state_intro.html#lowlatency_transformation) on model load. | false |
+| `low_latency_transformation` | `bool` | If set to true, model server will apply [low latency transformation](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_model_state_intro.html#lowlatency-transformations) on model load. | false |
**Note:** Setting `idle_sequence_cleanup`, `max_sequence_number` and `low_latency_transformation` require setting `stateful` to true.
@@ -309,7 +309,7 @@ If set to `true` sequence cleaner will check that model. Otherwise, sequence cle
There are limitations for using stateful models with OVMS:
- Support inference execution only using CPU as the target device.
- - Support Kaldi models with memory layers and non-Kaldi models with Tensor Iterator. See this [docs about stateful networks](https://docs.openvino.ai/2022.2/openvino_docs_IE_DG_network_state_intro.html) to learn about stateful networks representation in OpenVINO.
+ - Support Kaldi models with memory layers and non-Kaldi models with Tensor Iterator. See this [docs about stateful networks](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_model_state_intro.html) to learn about stateful networks representation in OpenVINO.
- [Auto batch size and shape](shape_batch_size_and_layout.md) are **not** available in stateful models.
- Stateful model instances **cannot** be used in [DAGs](dag_scheduler.md).
- Requests ordering is guaranteed only when a single client sends subsequent requests in a synchronous manner. Concurrent interaction with the same sequence might negatively affect the accuracy of the results.
diff --git a/docs/text_handling.md b/docs/text_handling.md
new file mode 100644
index 0000000000..66699e62c6
--- /dev/null
+++ b/docs/text_handling.md
@@ -0,0 +1,40 @@
+# Support for text data format {#ovms_docs_text}
+OpenVINO Model Server can now greatly simplify writing the applications with Natural Language Processing models. For the use cases related to text analysis or text generation, the client application can communicate with the model server using the original text format. There is no requirement to perform pre and post processing on the client side. Tokenization and detokenization can be now fully delegated to the server.
+We addressed both the situation when the original model requires tokens on input or output and there is added support for models with embedded tokenization layer. Below are demonstrated use cases with a simple client application sending and receiving text in a string format. Whole complexity of the text conversion is fully delegated to the remote serving endpoint.
+## DAG pipeline to delegate tokenization to the server
+When the model is using tokens on input or output, you can create a DAG pipeline which include custom nodes performing pre and post processing.
+OpenVINO Model Server can accept the text data format on the gRPC and REST API interface and deserializes it to the 2D array of bytes, where each row represents single, null terminated sentence, padded with `\0` aligned to longest batch.
+Example of batch size 2 of the string input - `abcd` and `ab`:
+ 'a', 'b', 'c', 'd', 0,
+ 'a', 'b', 0 , 0 , 0
+Such data in a tensor format can be passed to the custom node to perform the preprocessing like string tokenization. The output of the preprocessing node can be passed to the model.
+There is a built-in [Tokenizer](https://github.com/openvinotoolkit/model_server/tree/develop/src/custom_nodes/tokenizer) custom node for that use case based on Blingfire library.
+Similarly, a custom node can perform string detokenization and return a string to the model server client.
+Check the [end-to-end demonstration](../demos/gptj_causal_lm/python/README.md) of such use case with GPT based text generation.
+The client API snippets with string data format are included in [KServe API](./clients_kfs.md) and [TFS API](./clients_tfs.md).
+## Custom CPU extension for tokenization layer in the model
+Some AI model training frameworks supports the layers accepting the string format on the input or output. They include the layers performing the tokenization operations inside the neural network.
+While OpenVINO doesn't support natively string data type, it is possible to extend the capabilities with a CPU extension.
+We included in the model server a built-in extension for [SentencepieceTokenizer](https://github.com/openvinotoolkit/openvino_contrib/tree/master/modules/custom_operations) layer from TensorFlow.
+The extension is capable of converting 1D U8 OpenVINO tensor into appropriate format for [SentencepieceTokenizer]. OVMS is able to detect such layer and create 1D U8 tensor out of KServe/TensorflowServing API strings automatically.
+A demonstration of such use case is in the MUSE model which can be imported directly but the models server. The client can send the text data withtout any preprocessing and take advantage of much faster execution time.
+Check the [MUSE demo](../demos/universal-sentence-encoder/README.md).
diff --git a/docs/tf_model_binary_input.md b/docs/tf_model_binary_input.md
index 47d3f88905..eb3b8b66ae 100644
--- a/docs/tf_model_binary_input.md
+++ b/docs/tf_model_binary_input.md
@@ -4,7 +4,7 @@ This guide shows how to convert TensorFlow models and deploy them with the OpenV
- In this example TensorFlow model [ResNet](https://github.com/tensorflow/models/tree/v2.2.0/official/r1/resnet) will be used.
-- TensorFlow model can be converted into Intermediate Representation format using model_optimizer tool. There are several formats for storing TensorFlow model. In this guide, we present conversion from SavedModel format. More information about conversion process can be found on the [model optimizer documentation](https://docs.openvino.ai/2022.2/openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_TensorFlow.html#savedmodel_format).
+- TensorFlow model can be converted into Intermediate Representation format using model_optimizer tool. There are several formats for storing TensorFlow model. In this guide, we present conversion from SavedModel format. More information about conversion process can be found in the [model optimizer guide](https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_prepare_model_convert_model_tutorials.html).
- Binary input format has several requirements for the model and ovms configuration. More information can be found in [binary inputs documentation](binary_input.md).
## Steps
@@ -29,10 +29,10 @@ docker run -u $(id -u):$(id -g) -v ${PWD}/resnet_v2/:/resnet openvino/ubuntu20_d
*Note:* Some models might require other parameters such as `--scale` parameter.
- `--reverse_input_channels` - required for models that are trained with images in RGB order.
-- `--mean_values` , `--scale` - should be provided if input pre-processing operations are not a part of topology- and the pre-processing relies on the application providing input data. They can be determined in several ways described in [conversion parameters guide](https://docs.openvino.ai/2022.2/openvino_docs_MO_DG_prepare_model_convert_model_Converting_Model_General.html). In this example [model pre-processing script](https://github.com/tensorflow/models/blob/v2.2.0/official/r1/resnet/imagenet_preprocessing.py) was used to determine them.
+- `--mean_values` , `--scale` - should be provided if input pre-processing operations are not a part of topology- and the pre-processing relies on the application providing input data. They can be determined in several ways described in [conversion parameters guide](https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html). In this example [model pre-processing script](https://github.com/tensorflow/models/blob/v2.2.0/official/r1/resnet/imagenet_preprocessing.py) was used to determine them.
-*Note:* You can find out more about [TensorFlow Model conversion into Intermediate Representation](https://docs.openvino.ai/2022.2/openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_TensorFlow.html) if your model is stored in other formats.
+*Note:* You can find out more about [TensorFlow Model conversion into Intermediate Representation](https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_TensorFlow.html) if your model is stored in other formats.
This operation will create model files in `${PWD}/resnet_v2/models/resnet/1/` folder.
diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md
index 9376f5715f..506d94817b 100644
--- a/docs/troubleshooting.md
+++ b/docs/troubleshooting.md
@@ -102,4 +102,3 @@ HTTP_proxy
- Cache folder (by default `/opt/cache` or defined by `--cache_dir`) should be mounted into docker container with read-write access. Unless changed by the docker run command, the model server has a security context of ovms account with uid 5000.
- The biggest speedup in the model loading time is expected for GPU device. For CPU device the gain will depend on the model topology. In some rare cases, it is possible the load time will not be improved noticeably or it might be even slightly slower.
-- Currently using model cache is not supported with HDDL target device. Do not enable model cache while using HDDL cards.
diff --git a/external/BUILD b/external/BUILD
new file mode 100644
index 0000000000..0f9b9c8882
--- /dev/null
+++ b/external/BUILD
@@ -0,0 +1,50 @@
+# Copyright (c) 2023 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# The tf.patch "tf logging macros" part is changing the logging.h file because it has macro redefinition conflicts
+# with mediapipe logging.h on the following macros and functions connected with them.
+# We are adding glog/logging.h include and the github glog dependency in the tensorflow project
+# so that it uses the same macros from common dependency and we remove the conflicting macros from original file.
+# define LOG(severity) _TF_LOG_##severit
+# define VLOG_IS_ON(lvl)
+# define VLOG(level)
+# define DVLOG(verbose_level)
+# define LOG_EVERY_N(severity, n)
+# define LOG_FIRST_N(severity, n)
+# define CHECK(condition)
+# define CHECK_OP_LOG(name, op, val1, val2)
+# define CHECK_OP(name, op, val1, val2) CHECK_OP_LOG(name, op, val1, val2)
+# define CHECK_EQ(val1, val2) CHECK_OP(Check_EQ, ==, val1, val2)
+# define CHECK_NE(val1, val2) CHECK_OP(Check_NE, !=, val1, val2)
+# define CHECK_LE(val1, val2) CHECK_OP(Check_LE, <=, val1, val2)
+# define CHECK_LT(val1, val2) CHECK_OP(Check_LT, <, val1, val2)
+# define CHECK_GE(val1, val2) CHECK_OP(Check_GE, >=, val1, val2)
+# define CHECK_GT(val1, val2) CHECK_OP(Check_GT, >, val1, val2)
+# define DCHECK_EQ(x, y) _TF_DCHECK_NOP(x, y)
+# define DCHECK_NE(x, y) _TF_DCHECK_NOP(x, y)
+# define DCHECK_LE(x, y) _TF_DCHECK_NOP(x, y)
+# define DCHECK_LT(x, y) _TF_DCHECK_NOP(x, y)
+# define DCHECK_GE(x, y) _TF_DCHECK_NOP(x, y)
+ "listen.patch",
+ "tf.patch",
+ "net_http.patch",
\ No newline at end of file
diff --git a/external/mwaitpkg.patch b/external/mwaitpkg.patch
new file mode 100644
index 0000000000..d4c2eb2999
--- /dev/null
+++ b/external/mwaitpkg.patch
@@ -0,0 +1,14 @@
+diff --git a/BUILD.bazel b/BUILD.bazel
+index 7aecb3e0..5c81961e 100644
+--- a/BUILD.bazel
++++ b/BUILD.bazel
+@@ -37,7 +37,7 @@ cc_library(
+ ]),
+ copts = ["-w"] + select({
+ "@bazel_tools//platforms:windows": [""],
+- "//conditions:default": ["-mwaitpkg"],
++ "//conditions:default": [""],
+ }),
+ defines =
+ select({
diff --git a/external/tf.patch b/external/tf.patch
index d144ed97ce..d833d4b4e5 100644
--- a/external/tf.patch
+++ b/external/tf.patch
@@ -37,3 +37,469 @@ index be571aaf1f8..a8765b08bd4 100644
+From c29c6030c78a635541c01bb394a337f0133aa2a0 Mon Sep 17 00:00:00 2001
+From: Rafal Sapala
+Date: Fri, 5 May 2023 13:57:03 +0200
+Subject: [PATCH] tf logging macros
+ tensorflow/core/platform/default/BUILD | 13 ++
+ tensorflow/core/platform/default/log_macros.h | 119 ++++++++++++
+ tensorflow/core/platform/default/logging.cc | 4 -
+ tensorflow/core/platform/default/logging.h | 174 ++----------------
+ 4 files changed, 144 insertions(+), 166 deletions(-)
+ create mode 100644 tensorflow/core/platform/default/log_macros.h
+diff --git a/tensorflow/core/platform/default/BUILD b/tensorflow/core/platform/default/BUILD
+index 0856bb1edce..18f6211e11e 100644
+--- a/tensorflow/core/platform/default/BUILD
++++ b/tensorflow/core/platform/default/BUILD
+@@ -188,6 +188,18 @@ cc_library(
+ ],
+ )
++ name = "log_macros",
++ hdrs = ["log_macros.h"],
++ visibility = ["//visibility:public"],
++ name = "glog",
++ visibility = ["//visibility:public"],
++ deps = ["@com_github_glog_glog//:glog",],
+ cc_library(
+ name = "logging",
+ srcs = ["logging.cc"],
+@@ -199,6 +211,7 @@ cc_library(
+ ],
+ textual_hdrs = ["logging.h"],
+ deps = [
++ ":glog",
+ "//tensorflow/core/platform",
+ "//tensorflow/core/platform:env_time",
+ "//tensorflow/core/platform:macros",
+diff --git a/tensorflow/core/platform/default/log_macros.h b/tensorflow/core/platform/default/log_macros.h
+new file mode 100644
+index 00000000000..eee3803d12c
+--- /dev/null
++++ b/tensorflow/core/platform/default/log_macros.h
+@@ -0,0 +1,119 @@
++#pragma once
++#define LOG(severity) _TF_LOG_##severity
++// An instance of `LOG_EVERY_N` increments a hidden zero-initialized counter
++// every time execution passes through it and logs the specified message when
++// the counter's value is a multiple of `n`, doing nothing otherwise. Each
++// instance has its own counter. The counter's value can be logged by streaming
++// the symbol `COUNTER`. `LOG_EVERY_N` is thread-safe.
++// Example:
++// for (const auto& user : all_users) {
++// LOG_EVERY_N(INFO, 1000) << "Processing user #" << COUNTER;
++// ProcessUser(user);
++// }
++#define LOG_EVERY_N(severity, n) \
++ LOG(severity)
++// CHECK dies with a fatal error if condition is not true. It is *not*
++// controlled by NDEBUG, so the check will be executed regardless of
++// compilation mode. Therefore, it is safe to do things like:
++// CHECK(fp->Write(x) == 4)
++#define CHECK(condition) \
++ if (TF_PREDICT_FALSE(!(condition))) \
++ LOG(FATAL) << "Check failed: " #condition " "
++// `LOG_FIRST_N` behaves like `LOG_EVERY_N` except that the specified message is
++// logged when the counter's value is less than `n`. `LOG_FIRST_N` is
++// thread-safe.
++#define LOG_FIRST_N(severity, n) \
++ LOG(severity)
++// Turn VLOG off when under mobile devices for considerations of binary size.
++#define VLOG_IS_ON(lvl) ((lvl) <= 0)
++// Otherwise, set TF_CPP_MAX_VLOG_LEVEL environment to update minimum log level
++// of VLOG, or TF_CPP_VMODULE to set the minimum log level for individual
++// translation units.
++#define VLOG_IS_ON(lvl) \
++ (([](int level, const char* fname) { \
++ static const bool vmodule_activated = \
++ ::tensorflow::internal::LogMessage::VmoduleActivated(fname, level); \
++ return vmodule_activated; \
++ })(lvl, __FILE__))
++#define VLOG(level) \
++ ? (void)0 \
++ : ::tensorflow::internal::Voidifier() & \
++ ::tensorflow::internal::LogMessage(__FILE__, __LINE__, \
++ tensorflow::INFO)
++// `DVLOG` behaves like `VLOG` in debug mode (i.e. `#ifndef NDEBUG`).
++// Otherwise, it compiles away and does nothing.
++#ifndef NDEBUG
++#define DVLOG VLOG
++#ifndef DVLOG
++#define DVLOG(verbose_level) \
++ while (false && (verbose_level) > 0) ::tensorflow::internal::LogMessageNull()
++// In optimized mode, use CheckOpString to hint to compiler that
++// the while condition is unlikely.
++#define CHECK_OP_LOG(name, op, val1, val2) \
++ while (::tensorflow::internal::CheckOpString _result{ \
++ ::tensorflow::internal::name##Impl( \
++ ::tensorflow::internal::GetReferenceableValue(val1), \
++ ::tensorflow::internal::GetReferenceableValue(val2), \
++ #val1 " " #op " " #val2)}) \
++ ::tensorflow::internal::LogMessageFatal(__FILE__, __LINE__) << *(_result.str_)
++#define CHECK_OP(name, op, val1, val2) CHECK_OP_LOG(name, op, val1, val2)
++// CHECK_EQ/NE/...
++#define CHECK_EQ(val1, val2) CHECK_OP(Check_EQ, ==, val1, val2)
++#define CHECK_NE(val1, val2) CHECK_OP(Check_NE, !=, val1, val2)
++#define CHECK_LE(val1, val2) CHECK_OP(Check_LE, <=, val1, val2)
++#define CHECK_LT(val1, val2) CHECK_OP(Check_LT, <, val1, val2)
++#define CHECK_GE(val1, val2) CHECK_OP(Check_GE, >=, val1, val2)
++#define CHECK_GT(val1, val2) CHECK_OP(Check_GT, >, val1, val2)
++#ifndef NDEBUG
++// DCHECK_EQ/NE/...
++#define DCHECK(condition) CHECK(condition)
++#define DCHECK_EQ(val1, val2) CHECK_EQ(val1, val2)
++#define DCHECK_NE(val1, val2) CHECK_NE(val1, val2)
++#define DCHECK_LE(val1, val2) CHECK_LE(val1, val2)
++#define DCHECK_LT(val1, val2) CHECK_LT(val1, val2)
++#define DCHECK_GE(val1, val2) CHECK_GE(val1, val2)
++#define DCHECK_GT(val1, val2) CHECK_GT(val1, val2)
++#define DCHECK(condition) \
++ while (false && (condition)) LOG(FATAL)
++// NDEBUG is defined, so DCHECK_EQ(x, y) and so on do nothing.
++// However, we still want the compiler to parse x and y, because
++// we don't want to lose potentially useful errors and warnings.
++// _DCHECK_NOP is a helper, and should not be used outside of this file.
++#define _TF_DCHECK_NOP(x, y) \
++ while (false && ((void)(x), (void)(y), 0)) LOG(FATAL)
++#define DCHECK_EQ(x, y) _TF_DCHECK_NOP(x, y)
++#define DCHECK_NE(x, y) _TF_DCHECK_NOP(x, y)
++#define DCHECK_LE(x, y) _TF_DCHECK_NOP(x, y)
++#define DCHECK_LT(x, y) _TF_DCHECK_NOP(x, y)
++#define DCHECK_GE(x, y) _TF_DCHECK_NOP(x, y)
++#define DCHECK_GT(x, y) _TF_DCHECK_NOP(x, y)
+\ No newline at end of file
+diff --git a/tensorflow/core/platform/default/logging.cc b/tensorflow/core/platform/default/logging.cc
+index df7734f8f35..8aa40fa6b08 100644
+--- a/tensorflow/core/platform/default/logging.cc
++++ b/tensorflow/core/platform/default/logging.cc
+@@ -380,7 +380,6 @@ void LogString(const char* fname, int line, int severity,
+ LogMessage(fname, line, severity) << message;
+ }
+-template <>
+ void MakeCheckOpValueString(std::ostream* os, const char& v) {
+ if (v >= 32 && v <= 126) {
+ (*os) << "'" << v << "'";
+@@ -389,7 +388,6 @@ void MakeCheckOpValueString(std::ostream* os, const char& v) {
+ }
+ }
+-template <>
+ void MakeCheckOpValueString(std::ostream* os, const signed char& v) {
+ if (v >= 32 && v <= 126) {
+ (*os) << "'" << v << "'";
+@@ -398,7 +396,6 @@ void MakeCheckOpValueString(std::ostream* os, const signed char& v) {
+ }
+ }
+-template <>
+ void MakeCheckOpValueString(std::ostream* os, const unsigned char& v) {
+ if (v >= 32 && v <= 126) {
+ (*os) << "'" << v << "'";
+@@ -408,7 +405,6 @@ void MakeCheckOpValueString(std::ostream* os, const unsigned char& v) {
+ }
+ #if LANG_CXX11
+-template <>
+ void MakeCheckOpValueString(std::ostream* os, const std::nullptr_t& v) {
+ (*os) << "nullptr";
+ }
+diff --git a/tensorflow/core/platform/default/logging.h b/tensorflow/core/platform/default/logging.h
+index 27331b2bcd1..c6d56ce1955 100644
+--- a/tensorflow/core/platform/default/logging.h
++++ b/tensorflow/core/platform/default/logging.h
+@@ -36,9 +36,18 @@ limitations under the License.
+ #include "tensorflow/core/platform/macros.h"
+ #include "tensorflow/core/platform/types.h"
++#pragma GCC diagnostic push
++#pragma GCC diagnostic ignored "-Wsign-compare"
++#include "glog/logging.h"
++#pragma GCC diagnostic pop
+ // TODO(mrry): Prevent this Windows.h #define from leaking out of our headers.
+ #undef ERROR
+ namespace tensorflow {
+ const int INFO = 0; // base_logging::INFO;
+ const int WARNING = 1; // base_logging::WARNING;
+@@ -114,43 +123,6 @@ class LogMessageNull : public std::basic_ostringstream {
+-#define LOG(severity) _TF_LOG_##severity
+-// Turn VLOG off when under mobile devices for considerations of binary size.
+-#define VLOG_IS_ON(lvl) ((lvl) <= 0)
+-// Otherwise, set TF_CPP_MAX_VLOG_LEVEL environment to update minimum log level
+-// of VLOG, or TF_CPP_VMODULE to set the minimum log level for individual
+-// translation units.
+-#define VLOG_IS_ON(lvl) \
+- (([](int level, const char* fname) { \
+- static const bool vmodule_activated = \
+- ::tensorflow::internal::LogMessage::VmoduleActivated(fname, level); \
+- return vmodule_activated; \
+- })(lvl, __FILE__))
+-#define VLOG(level) \
+- ? (void)0 \
+- : ::tensorflow::internal::Voidifier() & \
+- ::tensorflow::internal::LogMessage(__FILE__, __LINE__, \
+- tensorflow::INFO)
+-// `DVLOG` behaves like `VLOG` in debug mode (i.e. `#ifndef NDEBUG`).
+-// Otherwise, it compiles away and does nothing.
+-#ifndef NDEBUG
+-#define DVLOG VLOG
+-#define DVLOG(verbose_level) \
+- while (false && (verbose_level) > 0) ::tensorflow::internal::LogMessageNull()
+ class LogEveryNState {
+ public:
+ bool ShouldLog(int n);
+@@ -217,26 +189,6 @@ class LogEveryNSecState {
+ logging_internal_stateful_condition_do_log; \
+ logging_internal_stateful_condition_do_log = false)
+-// An instance of `LOG_EVERY_N` increments a hidden zero-initialized counter
+-// every time execution passes through it and logs the specified message when
+-// the counter's value is a multiple of `n`, doing nothing otherwise. Each
+-// instance has its own counter. The counter's value can be logged by streaming
+-// the symbol `COUNTER`. `LOG_EVERY_N` is thread-safe.
+-// Example:
+-// for (const auto& user : all_users) {
+-// LOG_EVERY_N(INFO, 1000) << "Processing user #" << COUNTER;
+-// ProcessUser(user);
+-// }
+-#define LOG_EVERY_N(severity, n) \
+- LOG(severity)
+-// `LOG_FIRST_N` behaves like `LOG_EVERY_N` except that the specified message is
+-// logged when the counter's value is less than `n`. `LOG_FIRST_N` is
+-// thread-safe.
+-#define LOG_FIRST_N(severity, n) \
+- LOG(severity)
+ // `LOG_EVERY_POW_2` behaves like `LOG_EVERY_N` except that the specified
+ // message is logged when the counter's value is a power of 2.
+ // `LOG_EVERY_POW_2` is thread-safe.
+@@ -254,13 +206,6 @@ class LogEveryNSecState {
+ LOG(severity)
+-// CHECK dies with a fatal error if condition is not true. It is *not*
+-// controlled by NDEBUG, so the check will be executed regardless of
+-// compilation mode. Therefore, it is safe to do things like:
+-// CHECK(fp->Write(x) == 4)
+-#define CHECK(condition) \
+- if (TF_PREDICT_FALSE(!(condition))) \
+- LOG(FATAL) << "Check failed: " #condition " "
+ // Function is overloaded for integral types to allow static const
+ // integrals declared in classes and not defined to be used as arguments to
+@@ -279,28 +224,6 @@ inline unsigned int GetReferenceableValue(unsigned int t) { return t; }
+ inline int64 GetReferenceableValue(int64 t) { return t; }
+ inline uint64 GetReferenceableValue(uint64 t) { return t; }
+-// This formats a value for a failing CHECK_XX statement. Ordinarily,
+-// it uses the definition for operator<<, with a few special cases below.
+-inline void MakeCheckOpValueString(std::ostream* os, const T& v) {
+- (*os) << v;
+-// Overrides for char types provide readable values for unprintable
+-// characters.
+-template <>
+-void MakeCheckOpValueString(std::ostream* os, const char& v);
+-template <>
+-void MakeCheckOpValueString(std::ostream* os, const signed char& v);
+-template <>
+-void MakeCheckOpValueString(std::ostream* os, const unsigned char& v);
+-#if LANG_CXX11
+-// We need an explicit specialization for std::nullptr_t.
+-template <>
+-void MakeCheckOpValueString(std::ostream* os, const std::nullptr_t& v);
+ // A container for a string pointer which can be evaluated to a bool -
+ // true iff the pointer is non-NULL.
+ struct CheckOpString {
+@@ -311,17 +234,6 @@ struct CheckOpString {
+ string* str_;
+ };
+-// Build the error message string. Specify no inlining for code size.
+-string* MakeCheckOpString(const T1& v1, const T2& v2,
+- const char* exprtext) TF_ATTRIBUTE_NOINLINE;
+-// A helper class for formatting "expr (V1 vs. V2)" in a CHECK_XX
+-// statement. See MakeCheckOpString for sample usage. Other
+-// approaches were considered: use of a template method (e.g.,
+-// base::BuildCheckOpString(exprtext, base::Print, &v1,
+-// base::Print, &v2), however this approach has complications
+-// related to volatile arguments and function-pointer arguments).
+ class CheckOpMessageBuilder {
+ public:
+ // Inserts "exprtext" and " (" to the stream.
+@@ -339,14 +251,6 @@ class CheckOpMessageBuilder {
+ std::ostringstream* stream_;
+ };
+-string* MakeCheckOpString(const T1& v1, const T2& v2, const char* exprtext) {
+- CheckOpMessageBuilder comb(exprtext);
+- MakeCheckOpValueString(comb.ForVar1(), v1);
+- MakeCheckOpValueString(comb.ForVar2(), v2);
+- return comb.NewString();
+ // Helper functions for CHECK_OP macro.
+ // The (int, int) specialization works around the issue that the compiler
+ // will not instantiate the template version of the function on values of
+@@ -360,7 +264,7 @@ string* MakeCheckOpString(const T1& v1, const T2& v2, const char* exprtext) {
+ if (TF_PREDICT_TRUE(v1 op v2)) \
+ return NULL; \
+ else \
+- return ::tensorflow::internal::MakeCheckOpString(v1, v2, exprtext); \
++ return google::MakeCheckOpString(v1, v2, exprtext); \
+ } \
+ inline string* name##Impl(int v1, int v2, const char* exprtext) { \
+ return name##Impl(v1, v2, exprtext); \
+@@ -368,14 +272,14 @@ string* MakeCheckOpString(const T1& v1, const T2& v2, const char* exprtext) {
+ inline string* name##Impl(const size_t v1, const int v2, \
+ const char* exprtext) { \
+ if (TF_PREDICT_FALSE(v2 < 0)) { \
+- return ::tensorflow::internal::MakeCheckOpString(v1, v2, exprtext); \
++ return google::MakeCheckOpString(v1, v2, exprtext); \
+ } \
+ return name##Impl(v1, v2, exprtext); \
+ } \
+ inline string* name##Impl(const int v1, const size_t v2, \
+ const char* exprtext) { \
+ if (TF_PREDICT_FALSE(v2 >= std::numeric_limits::max())) { \
+- return ::tensorflow::internal::MakeCheckOpString(v1, v2, exprtext); \
++ return google::MakeCheckOpString(v1, v2, exprtext); \
+ } \
+ const size_t uval = (size_t)((unsigned)v2); \
+ return name##Impl(v1, uval, exprtext); \
+@@ -394,60 +298,6 @@ TF_DEFINE_CHECK_OP_IMPL(Check_GE, >=)
+-// In optimized mode, use CheckOpString to hint to compiler that
+-// the while condition is unlikely.
+-#define CHECK_OP_LOG(name, op, val1, val2) \
+- while (::tensorflow::internal::CheckOpString _result{ \
+- ::tensorflow::internal::name##Impl( \
+- ::tensorflow::internal::GetReferenceableValue(val1), \
+- ::tensorflow::internal::GetReferenceableValue(val2), \
+- #val1 " " #op " " #val2)}) \
+- ::tensorflow::internal::LogMessageFatal(__FILE__, __LINE__) << *(_result.str_)
+-#define CHECK_OP(name, op, val1, val2) CHECK_OP_LOG(name, op, val1, val2)
+-// CHECK_EQ/NE/...
+-#define CHECK_EQ(val1, val2) CHECK_OP(Check_EQ, ==, val1, val2)
+-#define CHECK_NE(val1, val2) CHECK_OP(Check_NE, !=, val1, val2)
+-#define CHECK_LE(val1, val2) CHECK_OP(Check_LE, <=, val1, val2)
+-#define CHECK_LT(val1, val2) CHECK_OP(Check_LT, <, val1, val2)
+-#define CHECK_GE(val1, val2) CHECK_OP(Check_GE, >=, val1, val2)
+-#define CHECK_GT(val1, val2) CHECK_OP(Check_GT, >, val1, val2)
+-#define CHECK_NOTNULL(val) \
+- ::tensorflow::internal::CheckNotNull(__FILE__, __LINE__, \
+- "'" #val "' Must be non NULL", (val))
+-#ifndef NDEBUG
+-// DCHECK_EQ/NE/...
+-#define DCHECK(condition) CHECK(condition)
+-#define DCHECK_EQ(val1, val2) CHECK_EQ(val1, val2)
+-#define DCHECK_NE(val1, val2) CHECK_NE(val1, val2)
+-#define DCHECK_LE(val1, val2) CHECK_LE(val1, val2)
+-#define DCHECK_LT(val1, val2) CHECK_LT(val1, val2)
+-#define DCHECK_GE(val1, val2) CHECK_GE(val1, val2)
+-#define DCHECK_GT(val1, val2) CHECK_GT(val1, val2)
+-#define DCHECK(condition) \
+- while (false && (condition)) LOG(FATAL)
+-// NDEBUG is defined, so DCHECK_EQ(x, y) and so on do nothing.
+-// However, we still want the compiler to parse x and y, because
+-// we don't want to lose potentially useful errors and warnings.
+-// _DCHECK_NOP is a helper, and should not be used outside of this file.
+-#define _TF_DCHECK_NOP(x, y) \
+- while (false && ((void)(x), (void)(y), 0)) LOG(FATAL)
+-#define DCHECK_EQ(x, y) _TF_DCHECK_NOP(x, y)
+-#define DCHECK_NE(x, y) _TF_DCHECK_NOP(x, y)
+-#define DCHECK_LE(x, y) _TF_DCHECK_NOP(x, y)
+-#define DCHECK_LT(x, y) _TF_DCHECK_NOP(x, y)
+-#define DCHECK_GE(x, y) _TF_DCHECK_NOP(x, y)
+-#define DCHECK_GT(x, y) _TF_DCHECK_NOP(x, y)
+ // These are for when you don't want a CHECK failure to print a verbose
+ // stack trace. The implementation of CHECK* in this file already doesn't.
+ #define QCHECK(condition) CHECK(condition)
diff --git a/extras/nginx-mtls-auth/Dockerfile.redhat b/extras/nginx-mtls-auth/Dockerfile.redhat
index f27a0ec64c..634f715600 100644
--- a/extras/nginx-mtls-auth/Dockerfile.redhat
+++ b/extras/nginx-mtls-auth/Dockerfile.redhat
@@ -21,7 +21,8 @@ RUN set -e ; \
set -x ; \
mkdir /certs ; \
- microdnf install systemd wget findutils; \
+ if [ -f /usr/bin/dnf ] ; then export DNF_TOOL=dnf ; else export DNF_TOOL=microdnf ; fi ; \
+ $DNF_TOOL install -y systemd wget findutils; \
rpm -Uv http://nginx.org/packages/mainline/centos/8/x86_64/RPMS/nginx-1.21.6-1.el8.ngx.x86_64.rpm ; \
wget -O /usr/bin/dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.2/dumb-init_1.2.2_amd64 ; \
chmod +x /usr/bin/dumb-init ; \
diff --git a/package.json b/package.json
new file mode 100644
index 0000000000..bbf7b45b51
--- /dev/null
+++ b/package.json
@@ -0,0 +1,8 @@
+ "name": "medipipe-dev",
+ "version": "0.0.0-alphga",
+ "description": "MediaPipe GitHub repo",
+ "devDependencies": {
+ "@bazel/typescript": "^5.7.1"
+ }
diff --git a/release_files/Dockerfile.redhat b/release_files/Dockerfile.redhat
index 2130922850..5a330c283d 100644
--- a/release_files/Dockerfile.redhat
+++ b/release_files/Dockerfile.redhat
@@ -14,8 +14,8 @@
# limitations under the License.
-ARG BASE_IMAGE=registry.access.redhat.com/ubi8/ubi:8.7
-FROM $BASE_IMAGE as base_build
+ARG BASE_IMAGE=registry.access.redhat.com/ubi8/ubi-minimal:8.7
+FROM registry.access.redhat.com/ubi8/ubi:8.7 as base_build
RUN yum install -y xz && yum clean all
COPY ovms.tar.xz /
@@ -28,40 +28,29 @@ RUN mkdir /licenses && ln -s /ovms/LICENSE /licenses && ln -s /ovms/thirdparty-l
RUN if [ -f /ovms/lib/libovms_shared.so ] ; then rm -rf /ovms/lib/libovms_shared.so ; else exit 0 ; fi ;
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
-FROM registry.access.redhat.com/ubi8/ubi-minimal:8.7 as release
+FROM $BASE_IMAGE as release
LABEL "name"="OVMS"
LABEL "vendor"="Intel Corporation"
-LABEL "version"="2022.3"
-LABEL "release"="2022"
+LABEL "version"="2023.0"
+LABEL "release"="2023"
LABEL "summary"="OpenVINO(TM) Model Server"
LABEL "description"="OpenVINO(TM) Model Server is a solution for serving AI models"
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
# hadolint ignore=DL3003,DL3041,SC2164
-RUN microdnf upgrade -y ; \
- microdnf install -y pkg-config && rpm -ivh https://vault.centos.org/centos/8/AppStream/x86_64/os/Packages/tbb-2018.2-9.el8.x86_64.rpm && \
+RUN if [ -f /usr/bin/dnf ] ; then export DNF_TOOL=dnf ; else export DNF_TOOL=microdnf ; fi ; \
+ $DNF_TOOL upgrade --setopt=install_weak_deps=0 -y ; \
+ $DNF_TOOL install -y pkg-config && rpm -ivh https://vault.centos.org/centos/8/AppStream/x86_64/os/Packages/tbb-2018.2-9.el8.x86_64.rpm && \
if [ "$GPU" == "1" ] ; then \
- "20.35.17767") \
- mkdir /tmp/gpu_deps ; \
- curl -L --output /tmp/gpu_deps/intel-opencl-20.35.17767-1.el7.x86_64.rpm https://sourceforge.net/projects/intel-compute-runtime/files/20.35.17767/centos-7/intel-opencl-20.35.17767-1.el7.x86_64.rpm/download ; \
- curl -L --output /tmp/gpu_deps/level-zero-1.0.0-1.el7.x86_64.rpm https://sourceforge.net/projects/intel-compute-runtime/files/20.35.17767/centos-7/level-zero-1.0.0-1.el7.x86_64.rpm/download ; \
- curl -L --output /tmp/gpu_deps/level-zero-devel-1.0.0-1.el7.x86_64.rpm https://sourceforge.net/projects/intel-compute-runtime/files/20.35.17767/centos-7/level-zero-devel-1.0.0-1.el7.x86_64.rpm/download ; \
- curl -L --output /tmp/gpu_deps/intel-igc-opencl-1.0.4756-1.el7.x86_64.rpm https://sourceforge.net/projects/intel-compute-runtime/files/20.35.17767/centos-7/intel-igc-opencl-1.0.4756-1.el7.x86_64.rpm/download ; \
- curl -L --output /tmp/gpu_deps/intel-igc-opencl-devel-1.0.4756-1.el7.x86_64.rpm https://sourceforge.net/projects/intel-compute-runtime/files/20.35.17767/centos-7/intel-igc-opencl-devel-1.0.4756-1.el7.x86_64.rpm/download ; \
- curl -L --output /tmp/gpu_deps/intel-igc-core-1.0.4756-1.el7.x86_64.rpm https://sourceforge.net/projects/intel-compute-runtime/files/20.35.17767/centos-7/intel-igc-core-1.0.4756-1.el7.x86_64.rpm/download ; \
- curl -L --output /tmp/gpu_deps/intel-gmmlib-20.2.4-1.el7.x86_64.rpm https://sourceforge.net/projects/intel-compute-runtime/files/20.35.17767/centos-7/intel-gmmlib-20.2.4-1.el7.x86_64.rpm/download ; \
- curl -L --output /tmp/gpu_deps/intel-gmmlib-devel-20.2.4-1.el7.x86_64.rpm https://sourceforge.net/projects/intel-compute-runtime/files/20.35.17767/centos-7/intel-gmmlib-devel-20.2.4-1.el7.x86_64.rpm/download ; \
- cd /tmp/gpu_deps && rpm -iv *.rpm && rm -Rf /tmp/gpu_deps ; \
- ;; \
"21.38.21026") \
mkdir /tmp/gpu_deps ; \
curl -L --output /tmp/gpu_deps/intel-igc-core-1.0.8708-1.el8.x86_64.rpm https://download.copr.fedorainfracloud.org/results/jdanecki/intel-opencl/centos-stream-8-x86_64/02870435-intel-igc/intel-igc-core-1.0.8708-1.el8.x86_64.rpm ; \
@@ -73,7 +62,7 @@ RUN microdnf upgrade -y ; \
cd /tmp/gpu_deps && rpm -iv *.rpm && rm -Rf /tmp/gpu_deps ; \
;; \
"22.10.22597") \
- microdnf install -y libedit ; \
+ $DNF_TOOL install -y libedit ; \
rpm -ivh https://repositories.intel.com/graphics/rhel/8.5/intel-gmmlib-22.0.3-i699.3.el8.x86_64.rpm ; \
rpm -ivh https://repositories.intel.com/graphics/rhel/8.5/intel-igc-core-1.0.10409-i699.3.el8.x86_64.rpm ; \
rpm -ivh https://repositories.intel.com/graphics/rhel/8.5/level-zero-1.7.9-i699.3.el8.x86_64.rpm ; \
@@ -82,7 +71,7 @@ RUN microdnf upgrade -y ; \
rpm -ivh https://repositories.intel.com/graphics/rhel/8.5/intel-opencl-22.10.22597-i699.3.el8.x86_64.rpm ; \
;; \
"22.28.23726") \
- microdnf install -y libedit ; \
+ $DNF_TOOL install -y libedit ; \
rpm -ivh https://repositories.intel.com/graphics/rhel/8.5/intel-gmmlib-22.1.7-i419.el8.x86_64.rpm ; \
rpm -ivh https://repositories.intel.com/graphics/rhel/8.5/intel-igc-core-1.0.11485-i419.el8.x86_64.rpm ; \
rpm -ivh https://repositories.intel.com/graphics/rhel/8.5/intel-igc-opencl-1.0.11485-i419.el8.x86_64.rpm ; \
@@ -90,6 +79,15 @@ RUN microdnf upgrade -y ; \
rpm -ivh https://repositories.intel.com/graphics/rhel/8.5-devel/intel-level-zero-gpu-1.3.23453-i392.el8.x86_64.rpm ; \
rpm -ivh https://repositories.intel.com/graphics/rhel/8.5-devel/level-zero-1.8.1-i392.el8.x86_64.rpm ; \
;; \
+ "22.43.24595") \
+ $DNF_TOOL install -y libedit ; \
+ rpm -ivh https://repositories.intel.com/graphics/rhel/8.6/intel-gmmlib-22.3.1-i529.el8.x86_64.rpm ; \
+ rpm -ivh https://repositories.intel.com/graphics/rhel/8.6/intel-igc-core-1.0.12504.6-i537.el8.x86_64.rpm ; \
+ rpm -ivh https://repositories.intel.com/graphics/rhel/8.6/intel-igc-opencl-1.0.12504.6-i537.el8.x86_64.rpm ; \
+ rpm -ivh https://repositories.intel.com/graphics/rhel/8.6/intel-opencl-22.43.24595.35-i538.el8.x86_64.rpm ; \
+ rpm -ivh https://repositories.intel.com/graphics/rhel/8.6/intel-level-zero-gpu-1.3.24595.35-i538.el8.x86_64.rpm ; \
+ rpm -ivh https://repositories.intel.com/graphics/rhel/8.6/level-zero-1.8.8-i524.el8.x86_64.rpm ; \
+ ;; \
*) \
echo "ERROR: Unrecognized driver ${INSTALL_DRIVER_VERSION}." ; \
exit 1 ; \
@@ -102,7 +100,7 @@ RUN microdnf upgrade -y ; \
rpm -ivh http://mirror.centos.org/centos/8-stream/BaseOS/x86_64/os/Packages/numactl-2.0.12-11.el8.x86_64.rpm; \
rpm -ivh http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/ocl-icd-2.2.12-1.el8.x86_64.rpm; \
else \
- microdnf install -y tar gzip; \
+ $DNF_TOOL install -y tar gzip; \
mkdir /tmp_ovms ; \
cd /tmp_ovms ; \
curl -L --fail -o deps.tar.xz "$INSTALL_RPMS_FROM_URL" ; \
@@ -112,11 +110,18 @@ RUN microdnf upgrade -y ; \
cd / ; \
rm -rf /tmp_ovms ; \
fi ; \
- microdnf install -y shadow-utils; \
+ $DNF_TOOL install -y shadow-utils; \
cp -v /etc/ssl/certs/ca-bundle.crt /etc/ssl/certs/ca-certificates.crt ; \
groupadd --gid 5000 ovms && groupadd --gid 44 video1 && \
useradd --home-dir /home/ovms --create-home --uid 5000 --gid 5000 --groups 39,44 --shell /bin/bash --skel /dev/null ovms
+# for NVIDIA
+RUN if [ "$NVIDIA" == "1" ]; then true ; else exit 0 ; fi ; echo "installing cuda yum package"; \
+ dnf install -y \
+ libcudnn8- \
+ libcutensor1- && \
+ dnf clean all
COPY --from=base_build /ovms /ovms
COPY --from=base_build /licenses /licenses
diff --git a/release_files/Dockerfile.ubuntu b/release_files/Dockerfile.ubuntu
index bf1c432403..63d7520cc6 100644
--- a/release_files/Dockerfile.ubuntu
+++ b/release_files/Dockerfile.ubuntu
@@ -73,11 +73,10 @@ FROM $BASE_IMAGE as release
ENV HDDL_INSTALL_DIR=/ovms/lib/hddl
ENV DEBIAN_FRONTEND=noninteractive
SHELL ["/bin/bash", "-c"]
@@ -90,16 +89,6 @@ RUN apt-get update -y ; \
if [ "$GPU" == "1" ] ; then \
apt-get update && apt-get install -y libnuma1 ocl-icd-libopencl1 --no-install-recommends && rm -rf /var/lib/apt/lists/* && \
- "20.35.17767") \
- mkdir /tmp/gpu_deps && cd /tmp/gpu_deps ; \
- curl -L -O https://github.com/intel/compute-runtime/releases/download/20.35.17767/intel-gmmlib_20.2.4_amd64.deb ; \
- curl -L -O https://github.com/intel/compute-runtime/releases/download/20.35.17767/intel-igc-core_1.0.4756_amd64.deb ; \
- curl -L -O https://github.com/intel/compute-runtime/releases/download/20.35.17767/intel-igc-opencl_1.0.4756_amd64.deb ; \
- curl -L -O https://github.com/intel/compute-runtime/releases/download/20.35.17767/intel-opencl_20.35.17767_amd64.deb ; \
- curl -L -O https://github.com/intel/compute-runtime/releases/download/20.35.17767/intel-ocloc_20.35.17767_amd64.deb ; \
- curl -L -O https://github.com/intel/compute-runtime/releases/download/20.35.17767/intel-level-zero-gpu_1.0.17767_amd64.deb ; \
- dpkg -i intel*.deb && rm -Rf /tmp/gpu_deps ; \
- ;; \
"21.48.21782") \
mkdir /tmp/gpu_deps && cd /tmp/gpu_deps ; \
curl -L -O https://github.com/intel/compute-runtime/releases/download/21.48.21782/intel-gmmlib_21.3.3_amd64.deb ; \
@@ -131,6 +120,28 @@ RUN apt-get update -y ; \
apt-get clean ; \
rm -rf /var/lib/apt/lists/* && rm -rf /tmp/* ; \
;; \
+ "22.43.24595") \
+ apt-get update && apt-get install -y --no-install-recommends gpg gpg-agent && \
+ curl https://repositories.intel.com/graphics/intel-graphics.key | gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \
+ echo 'deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/graphics/ubuntu focal-legacy main' | tee /etc/apt/sources.list.d/intel.gpu.focal.list && \
+ apt-get update && \
+ apt-get install -y --no-install-recommends \
+ intel-opencl-icd=22.43.24595.35+i538~20.04 \
+ intel-level-zero-gpu=1.3.24595.35+i538~20.04 \
+ level-zero=1.8.8+i524~u20.04 && \
+ apt-get purge gpg gpg-agent --yes && apt-get --yes autoremove && \
+ apt-get clean ; \
+ rm -rf /var/lib/apt/lists/* && rm -rf /tmp/* ; \
+ ;; \
+ "23.13.26032") \
+ mkdir /tmp/gpu_deps && cd /tmp/gpu_deps ; \
+ curl -L -O https://github.com/intel/compute-runtime/releases/download/23.05.25593.11/libigdgmm12_22.3.0_amd64.deb ; \
+ curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.13700.14/intel-igc-core_1.0.13700.14_amd64.deb ; \
+ curl -L -O https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.13700.14/intel-igc-opencl_1.0.13700.14_amd64.deb ; \
+ curl -L -O https://github.com/intel/compute-runtime/releases/download/23.13.26032.30/intel-opencl-icd_23.13.26032.30_amd64.deb ; \
+ curl -L -O https://github.com/intel/compute-runtime/releases/download/23.13.26032.30/libigdgmm12_22.3.0_amd64.deb ; \
+ dpkg -i *.deb && rm -Rf /tmp/gpu_deps ; \
+ ;; \
*) \
dpkg -P intel-gmmlib intel-igc-core intel-igc-opencl intel-level-zero-gpu intel-ocloc intel-opencl intel-opencl-icd && \
apt-get update && apt-get -y --no-install-recommends install dpkg-dev && rm -rf /var/lib/apt/lists/* && \
diff --git a/release_files/thirdparty-licenses/blingfire.LICENSE.txt b/release_files/thirdparty-licenses/blingfire.LICENSE.txt
new file mode 100644
index 0000000000..4b1ad51b2f
--- /dev/null
+++ b/release_files/thirdparty-licenses/blingfire.LICENSE.txt
@@ -0,0 +1,21 @@
+ MIT License
+ Copyright (c) Microsoft Corporation. All rights reserved.
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+ The above copyright notice and this permission notice shall be included in all
+ copies or substantial portions of the Software.
diff --git a/security.md b/security.md
new file mode 100644
index 0000000000..d85d4358b1
--- /dev/null
+++ b/security.md
@@ -0,0 +1,5 @@
+# Security Policy
+Intel is committed to rapidly addressing security vulnerabilities affecting our customers and providing clear guidance on the solution, impact, severity and mitigation.
+## Reporting a Vulnerability
+Please report any security vulnerabilities in this project [utilizing the guidelines here](https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html).
diff --git a/src/BUILD b/src/BUILD
index 6adee8bb7d..b373b570bd 100644
--- a/src/BUILD
+++ b/src/BUILD
@@ -21,6 +21,23 @@
# ],
+load("@bazel_skylib//lib:selects.bzl", "selects")
+load("@mediapipe//mediapipe/framework:more_selects.bzl", "more_selects")
+#To build without mediapipe use flags - bazel build --define MEDIAPIPE_DISABLE=1 --cxxopt=-DMEDIAPIPE_DISABLE=1 //src:ovms
+ name = "disable_mediapipe",
+ define_values = {
+ },
+ visibility = ["//visibility:public"],
+ name = "not_disable_mediapipe",
+ negate = ":disable_mediapipe",
constraint_setting(name = "linux_distribution_family")
constraint_value(constraint_setting = "linux_distribution_family", name = "fedora") # like RHEL/CentOS
constraint_value(constraint_setting = "linux_distribution_family", name = "debian") # like Ubuntu
@@ -43,16 +60,16 @@ cc_library(
load("@com_google_protobuf//:protobuf.bzl", "cc_proto_library")
+load("@tensorflow_serving//tensorflow_serving:serving.bzl", "serving_tensorflow_proto_dep")
name = "ovms_shared",
dynamic_deps = [],
static_deps = ["@tensorflow_serving//:__subpackages__",
- "@rapidjson//:__subpackages__",
- "@spdlog//:__subpackages__",
+ "@com_github_gabime_spdlog//:__subpackages__",
- "@cxxopts//:__subpackages__",
+ "@com_github_jarro2783_cxxopts//:__subpackages__",
@@ -74,17 +91,63 @@ cc_shared_library(
- "@openvino//:__subpackages__",
+ "@linux_openvino//:__subpackages__",
- ],
+ "@oneTBB//:__subpackages__",
+ "@com_github_glog_glog//:__subpackages__",
+ "@com_github_gflags_gflags//:__subpackages__",
+ ] + select({
+ "//conditions:default": [
+ "@mediapipe//:__subpackages__",
+ "@mediapipe_calculators//:__subpackages__",
+ "@model_api//:__subpackages__",
+ "@arm_neon_2_x86_sse//:__subpackages__",
+ "@ruy//:__subpackages__",
+ "@cpuinfo//:__subpackages__",
+ "@clog//:__subpackages__",
+ "@gemmlowp//:__subpackages__",
+ "@flatbuffers//:__subpackages__",
+ ],
+ "//src:disable_mediapipe" : [],
+ }),
features = [],
roots = ["//src:ovms_lib"],
+load("@mediapipe//mediapipe/framework/port:build_config.bzl", "mediapipe_cc_proto_library", "mediapipe_proto_library")
+ name = "ovmscalculatoroptions_proto", # ovmscalculatoroptions_cc_proto - just mediapipe stuff with mediapipe_proto_library adding nonvisible target
+ srcs = ["mediapipe_calculators/ovmscalculator.proto"],
+ visibility = ["//visibility:public"],
+ deps = [
+ "@mediapipe//mediapipe/framework:calculator_options_proto",
+ "@mediapipe//mediapipe/framework:calculator_proto",
+ ],
+ name = "modelapiovmsinferencecalculator_proto", # ovmscalculatoroptions_cc_proto - just mediapipe stuff with mediapipe_proto_library adding nonvisible target
+ srcs = ["mediapipe_calculators/modelapiovmsinferencecalculator.proto"],
+ visibility = ["//visibility:public"],
+ deps = [
+ "@mediapipe//mediapipe/framework:calculator_options_proto",
+ "@mediapipe//mediapipe/framework:calculator_proto",
+ ],
+ name = "modelapiovmssessioncalculator_proto", # ovmscalculatoroptions_cc_proto - just mediapipe stuff with mediapipe_proto_library adding nonvisible target
+ srcs = ["mediapipe_calculators/modelapiovmssessioncalculator.proto"],
+ visibility = ["//visibility:public"],
+ deps = [
+ "@mediapipe//mediapipe/framework:calculator_options_proto",
+ "@mediapipe//mediapipe/framework:calculator_proto",
+ ],
name = "ovms_lib",
hdrs = ["ovms.h"],
@@ -135,6 +198,7 @@ cc_library(
+ "filesystem.cpp",
@@ -260,6 +324,8 @@ cc_library(
+ "servablemetadata.cpp",
+ "servablemetadata.hpp",
@@ -281,8 +347,6 @@ cc_library(
- "systeminfo_impl.cpp",
- "systeminfo_impl.hpp",
@@ -295,34 +359,39 @@ cc_library(
- "binaryutils.hpp",
- "binaryutils.cpp",
- ],
+ "tensor_conversion.hpp",
+ "tensor_conversion.cpp",
+ ] + select({
+ "//conditions:default": [
+ "mediapipe_internal/mediapipefactory.cpp",
+ "mediapipe_internal/mediapipefactory.hpp",
+ "mediapipe_internal/mediapipegraphconfig.hpp",
+ "mediapipe_internal/mediapipegraphconfig.cpp",
+ "mediapipe_internal/mediapipegraphdefinition.cpp",
+ "mediapipe_internal/mediapipegraphdefinition.hpp",
+ "mediapipe_internal/mediapipegraphexecutor.cpp",
+ "mediapipe_internal/mediapipegraphexecutor.hpp",
+ "mediapipe_calculators/modelapiovmsadapter.cc",
+ "mediapipe_calculators/modelapiovmsadapter.hpp",
+ "mediapipe_calculators/modelapiovmsadapterwrapper.cc",
+ "mediapipe_calculators/modelapiovmsadapterwrapper.hpp",
+ "mediapipe_calculators/ovms_calculator.cc",
+ "mediapipe_calculators/modelapiovmsinferencecalculator.cc",
+ "mediapipe_calculators/modelapiovmssessioncalculator.cc",
+ ],
+ "//src:disable_mediapipe" : [],
+ }),
deps = [
- "@tensorflow_serving//tensorflow_serving/apis:prediction_service_cc_proto",
- "@tensorflow_serving//tensorflow_serving/apis:model_service_cc_proto",
- "@minitrace//:trace",
- "@com_github_grpc_grpc//:grpc++",
- "@org_tensorflow//tensorflow/core:framework",
- "@rapidjson//:rapidjson",
- "@spdlog//:spdlog",
- "@cxxopts//:cxxopts",
- "@awssdk//:s3",
- "@awssdk//:core",
- "@awssdk//:deps",
- "@azure//:storage",
- "@cpprest//:sdk",
- "@boost//:lib",
- "@com_github_googleapis_google_cloud_cpp//google/cloud/storage:storage_client",
- "@tensorflow_serving//tensorflow_serving/util/net_http/server/public:http_server",
- "@tensorflow_serving//tensorflow_serving/util/net_http/server/public:http_server_api",
- "@tensorflow_serving//tensorflow_serving/util:threadpool_executor",
- "@tensorflow_serving//tensorflow_serving/util:json_tensor",
- "@openvino//:openvino",
- "@opencv//:opencv",
- "@com_github_jupp0r_prometheus_cpp//core",
+ "//:ovms_dependencies",
- ],
+ ] + select({
+ "//conditions:default": [
+ "//src:ovmscalculatoroptions_cc_proto", # ovmscalculatoroptions_proto - just mediapipe stuff with mediapipe_proto_library adding nonvisible target
+ "//src:modelapiovmsinferencecalculator_cc_proto", # modelapiovmsinferencecalculator_proto - just mediapipe stuff with mediapipe_proto_library adding nonvisible target
+ "//src:modelapiovmssessioncalculator_cc_proto", # modelapiovmssessioncalculator_proto - just mediapipe stuff with mediapipe_proto_library adding nonvisible target
+ ],
+ "//src:disable_mediapipe" : [],
+ }),
local_defines = [
@@ -339,7 +408,8 @@ cc_library(
- ]
+ ],
+ alwayslink = 1,
@@ -350,7 +420,7 @@ cc_binary(
linkshared = 1,
deps = [
- "@rapidjson//:rapidjson",
+ "@com_github_tencent_rapidjson//:rapidjson",
@@ -426,7 +496,7 @@ cc_binary(
deps = [
- "@opencv//:opencv"
+ "@linux_opencv//:opencv"
linkshared = 1,
copts = [
@@ -445,7 +515,7 @@ cc_binary(
deps = [
- "@opencv//:opencv"
+ "@linux_opencv//:opencv"
linkshared = 1,
copts = [
@@ -490,7 +560,7 @@ cc_binary(
deps = [
- "@opencv//:opencv"
+ "@linux_opencv//:opencv"
linkshared = 1,
copts = [
@@ -509,7 +579,7 @@ cc_binary(
deps = [
- "@opencv//:opencv"
+ "@linux_opencv//:opencv"
linkshared = 1,
copts = [
@@ -528,7 +598,7 @@ cc_binary(
deps = [
- "@opencv//:opencv"
+ "@linux_opencv//:opencv"
linkshared = 1,
copts = [
@@ -577,8 +647,6 @@ cc_binary(
name = "capi_benchmark",
srcs = [
- "stringutils.cpp",
- "stringutils.hpp",
linkopts = [
@@ -608,6 +676,7 @@ cc_binary(
# "-lovms_shared", # Use for dynamic linking when neccessary
copts = [
+ "-Wall",
@@ -623,7 +692,8 @@ cc_test(
linkstatic = 1,
srcs = [
- "test/binaryutils_test.cpp",
+ "test/tensor_conversion_test.cpp",
+ "test/c_api_test_utils.hpp",
@@ -693,16 +763,65 @@ cc_test(
- ],
+ ] + select({
+ "//conditions:default": [
+ "test/mediapipe/inputsidepacketusertestcalc.cc",
+ "test/mediapipeflow_test.cpp",
+ "test/mediapipe_validation_test.cpp",
+ ],
+ "//src:disable_mediapipe" : [],
+ }),
data = [
+ "test/add_two_inputs_model/1/add.xml",
+ "test/add_two_inputs_model/1/add.bin",
+ "test/binaryutils/grayscale.jpg",
+ "test/binaryutils/rgb.jpg",
+ "test/binaryutils/rgb2x2.jpg",
+ "test/binaryutils/rgb4x4.jpg",
+ "test/c_api/config.json",
+ "test/c_api/config_benchmark.json",
+ "test/c_api/config_dummy_dag.json",
+ "test/c_api/config_dummy_dynamic_entry_dag.json",
+ "test/c_api/config_metadata_all.json",
+ "test/c_api/config_standard_dummy.json",
+ "test/configs/emptyConfigWithMetrics.json",
- "test/add_two_inputs_model/1/add.xml",
- "test/add_two_inputs_model/1/add.bin",
+ "test/dummy_saved_model/1/saved_model.pb",
+ "test/dummyUppercase/1/dummy.xml",
+ "test/dummyUppercase/1/dummy.bin",
+ "test/mediapipe/subconfig.json",
+ "test/mediapipe/config_mediapipe_add_adapter_full.json",
+ "test/mediapipe/config_mediapipe_dummy_adapter_full_subconfig.json",
+ "test/mediapipe/config_mediapipe_all_graphs_adapter_full.json",
+ "test/mediapipe/config_mediapipe_dummy_adapter_full_dag.json",
+ "test/mediapipe/config_mediapipe_dummy_adapter_full_dummy_in_both_config_and_subconfig.json",
+ "test/mediapipe/config_mediapipe_dummy_adapter_full.json",
+ "test/mediapipe/config_mediapipe_graph_with_side_packets.json",
+ "test/mediapipe/config_standard_add.json",
+ "test/mediapipe/config_standard_dummy.json",
+ "test/mediapipe/graphadd.pbtxt",
+ "test/mediapipe/graphaddadapterfull.pbtxt",
+ "test/mediapipe/graphdummy.pbtxt",
+ "test/mediapipe/graphdummyadapterfull.pbtxt",
+ "test/mediapipe/graphWithParams.pbtxt",
+ "test/mediapipe/relative_paths/config_relative_dummy_negative.json",
+ "test/mediapipe/relative_paths/config_relative_add_subconfig_negative.json",
+ "test/mediapipe/relative_paths/config_relative_add_subconfig.json",
+ "test/mediapipe/relative_paths/config_relative_dummy.json",
+ "test/mediapipe/relative_paths/graph1/dummy1/1/dummy.xml",
+ "test/mediapipe/relative_paths/graph2/dummy2/1/dummy.xml",
+ "test/mediapipe/relative_paths/graph1/graphaddadapterfull.pbtxt",
+ "test/mediapipe/relative_paths/graph1/subconfig.json",
+ "test/mediapipe/relative_paths/graph2/graphadd.pbtxt",
+ "test/mediapipe/graphdummyadapterfull_dummyinputnames.pbtxt",
+ "test/mediapipe/relative_paths/graph2/subconfig.json",
+ "test/passthrough/1/passthrough.xml",
+ "test/passthrough/1/passthrough.bin",
diff --git a/src/azurefilesystem.cpp b/src/azurefilesystem.cpp
index a16e4b4e10..d71ae9e938 100644
--- a/src/azurefilesystem.cpp
+++ b/src/azurefilesystem.cpp
@@ -24,9 +24,6 @@
namespace ovms {
-const std::string AzureFileSystem::AZURE_URL_FILE_PREFIX = "azfs://";
-const std::string AzureFileSystem::AZURE_URL_BLOB_PREFIX = "az://";
static as::cloud_storage_account createDefaultOrAnonymousAccount() {
try {
const char* env_cred = std::getenv("AZURE_STORAGE_CONNECTION_STRING");
diff --git a/src/azurefilesystem.hpp b/src/azurefilesystem.hpp
index ed7dd1096e..3b0d26370f 100644
--- a/src/azurefilesystem.hpp
+++ b/src/azurefilesystem.hpp
@@ -131,10 +131,6 @@ class AzureFileSystem : public FileSystem {
StatusCode deleteFileFolder(const std::string& path) override;
- static const std::string AZURE_URL_FILE_PREFIX;
- static const std::string AZURE_URL_BLOB_PREFIX;
diff --git a/src/azurestorage.cpp b/src/azurestorage.cpp
index 2652fb31f3..b6eecd760d 100644
--- a/src/azurestorage.cpp
+++ b/src/azurestorage.cpp
@@ -37,29 +37,6 @@ const std::string AzureStorageAdapter::extractAzureStorageExceptionMessage(const
-std::string AzureStorageAdapter::joinPath(std::initializer_list segments) {
- std::string joined;
- for (const auto& seg : segments) {
- if (joined.empty()) {
- joined = seg;
- } else if (isAbsolutePath(seg)) {
- if (joined[joined.size() - 1] == '/') {
- joined.append(seg.substr(1));
- } else {
- joined.append(seg);
- }
- } else {
- if (joined[joined.size() - 1] != '/') {
- joined.append("/");
- }
- joined.append(seg);
- }
- }
- return joined;
StatusCode AzureStorageAdapter::CreateLocalDir(const std::string& path) {
int status =
mkdir(const_cast(path.c_str()), S_IRUSR | S_IWUSR | S_IXUSR);
@@ -470,8 +447,8 @@ StatusCode AzureStorageBlob::downloadFileFolderTo(const std::string& local_path)
for (auto&& d : dirs) {
- std::string remote_dir_path = joinPath({fullUri_, d});
- std::string local_dir_path = joinPath({local_path, d});
+ std::string remote_dir_path = FileSystem::joinPath({fullUri_, d});
+ std::string local_dir_path = FileSystem::joinPath({local_path, d});
SPDLOG_LOGGER_TRACE(azurestorage_logger, "Processing directory {} from {} -> {}", d, remote_dir_path,
@@ -498,8 +475,8 @@ StatusCode AzureStorageBlob::downloadFileFolderTo(const std::string& local_path)
for (auto&& f : files) {
- std::string remote_file_path = joinPath({fullUri_, f});
- std::string local_file_path = joinPath({local_path, f});
+ std::string remote_file_path = FileSystem::joinPath({fullUri_, f});
+ std::string local_file_path = FileSystem::joinPath({local_path, f});
SPDLOG_LOGGER_TRACE(azurestorage_logger, "Processing file {} from {} -> {}", f, remote_file_path,
@@ -555,9 +532,9 @@ StatusCode AzureStorageBlob::parseFilePath(const std::string& path) {
fullUri_ = path;
int share_start = 0;
// Blob path
- if (path.find(AzureFileSystem::AZURE_URL_BLOB_PREFIX) != std::string::npos) {
- share_start = path.find(AzureFileSystem::AZURE_URL_BLOB_PREFIX) + AzureFileSystem::AZURE_URL_BLOB_PREFIX.size();
- } else if (path.find(AzureFileSystem::AZURE_URL_FILE_PREFIX) != std::string::npos) {
+ if (path.find(FileSystem::AZURE_URL_BLOB_PREFIX) != std::string::npos) {
+ share_start = path.find(FileSystem::AZURE_URL_BLOB_PREFIX) + FileSystem::AZURE_URL_BLOB_PREFIX.size();
+ } else if (path.find(FileSystem::AZURE_URL_FILE_PREFIX) != std::string::npos) {
// File path
SPDLOG_LOGGER_ERROR(azurestorage_logger, "Wrong object type - az:// prefix in path required, azure:// found:", path);
return StatusCode::AS_INVALID_PATH;
@@ -1064,8 +1041,8 @@ StatusCode AzureStorageFile::downloadFileFolderTo(const std::string& local_path)
for (auto&& d : dirs) {
- std::string remote_dir_path = joinPath({fullUri_, d});
- std::string local_dir_path = joinPath({local_path, d});
+ std::string remote_dir_path = FileSystem::joinPath({fullUri_, d});
+ std::string local_dir_path = FileSystem::joinPath({local_path, d});
SPDLOG_LOGGER_TRACE(azurestorage_logger, "Processing directory {} from {} -> {}", d, remote_dir_path,
@@ -1092,8 +1069,8 @@ StatusCode AzureStorageFile::downloadFileFolderTo(const std::string& local_path)
for (auto&& f : files) {
- std::string remote_file_path = joinPath({fullUri_, f});
- std::string local_file_path = joinPath({local_path, f});
+ std::string remote_file_path = FileSystem::joinPath({fullUri_, f});
+ std::string local_file_path = FileSystem::joinPath({local_path, f});
SPDLOG_LOGGER_TRACE(azurestorage_logger, "Processing file {} from {} -> {}", f, remote_file_path,
@@ -1154,9 +1131,9 @@ StatusCode AzureStorageFile::parseFilePath(const std::string& path) {
fullUri_ = path;
int share_start = 0;
// File or directory path
- if (path.find(AzureFileSystem::AZURE_URL_FILE_PREFIX) != std::string::npos) {
- share_start = path.find(AzureFileSystem::AZURE_URL_FILE_PREFIX) + AzureFileSystem::AZURE_URL_FILE_PREFIX.size();
- } else if (path.find(AzureFileSystem::AZURE_URL_BLOB_PREFIX) != std::string::npos) {
+ if (path.find(FileSystem::AZURE_URL_FILE_PREFIX) != std::string::npos) {
+ share_start = path.find(FileSystem::AZURE_URL_FILE_PREFIX) + FileSystem::AZURE_URL_FILE_PREFIX.size();
+ } else if (path.find(FileSystem::AZURE_URL_BLOB_PREFIX) != std::string::npos) {
// Blob path
SPDLOG_LOGGER_ERROR(azurestorage_logger, "Wrong object type. azfs:// prefix in path required, found az://:", path);
return StatusCode::AS_INVALID_PATH;
@@ -1212,7 +1189,7 @@ std::shared_ptr AzureStorageFactory::getNewAzureStorageObje
bool AzureStorageFactory::isBlobStoragePath(std::string path) {
- return (path.find(AzureFileSystem::AZURE_URL_BLOB_PREFIX) != std::string::npos);
+ return (path.find(FileSystem::AZURE_URL_BLOB_PREFIX) != std::string::npos);
} // namespace ovms
diff --git a/src/azurestorage.hpp b/src/azurestorage.hpp
index afff5dd988..1cc0f27b93 100644
--- a/src/azurestorage.hpp
+++ b/src/azurestorage.hpp
@@ -58,7 +58,6 @@ class AzureStorageAdapter {
virtual StatusCode downloadFileFolderTo(const std::string& local_path) = 0;
virtual StatusCode checkPath(const std::string& path) = 0;
- std::string joinPath(std::initializer_list segments);
StatusCode CreateLocalDir(const std::string& path);
bool isAbsolutePath(const std::string& path);
std::vector FindSubdirectories(std::string path);
diff --git a/src/capi_frontend/capi.cpp b/src/capi_frontend/capi.cpp
index 03c2c38cf8..6bb70e3ec8 100644
--- a/src/capi_frontend/capi.cpp
+++ b/src/capi_frontend/capi.cpp
@@ -14,11 +14,14 @@
// limitations under the License.
#include "../buffer.hpp"
#include "../dags/pipeline.hpp"
+#include "../dags/pipelinedefinition.hpp"
+#include "../dags/pipelinedefinitionunloadguard.hpp"
#include "../execution_context.hpp"
#include "../inferenceparameter.hpp"
#include "../inferencerequest.hpp"
@@ -29,12 +32,15 @@
#include "../modelinstanceunloadguard.hpp"
#include "../modelmanager.hpp"
#include "../ovms.h" // NOLINT
+#include "../prediction_service.hpp"
#include "../profiler.hpp"
#include "../servablemanagermodule.hpp"
+#include "../servablemetadata.hpp"
#include "../server.hpp"
#include "../server_settings.hpp"
#include "../status.hpp"
#include "../timer.hpp"
+#include "capi_utils.hpp"
using ovms::Buffer;
using ovms::ExecutionContext;
@@ -44,6 +50,9 @@ using ovms::InferenceResponse;
using ovms::InferenceTensor;
using ovms::ModelInstanceUnloadGuard;
using ovms::ModelManager;
+using ovms::Pipeline;
+using ovms::PipelineDefinition;
+using ovms::PipelineDefinitionUnloadGuard;
using ovms::ServableManagerModule;
using ovms::Server;
using ovms::Status;
@@ -55,6 +64,16 @@ using std::chrono::microseconds;
extern "C" {
+OVMS_Status* OVMS_ApiVersion(uint32_t* major, uint32_t* minor) {
+ if (major == nullptr)
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "major version"));
+ if (minor == nullptr)
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "minor version"));
+ return nullptr;
void OVMS_StatusDelete(OVMS_Status* status) {
if (status == nullptr)
@@ -64,9 +83,9 @@ void OVMS_StatusDelete(OVMS_Status* status) {
OVMS_Status* OVMS_StatusGetCode(OVMS_Status* status,
uint32_t* code) {
if (status == nullptr)
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STATUS));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "status"));
if (code == nullptr)
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_NUMBER));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "code"));
ovms::Status* sts = reinterpret_cast(status);
*code = static_cast(sts->getCode());
return nullptr;
@@ -75,9 +94,9 @@ OVMS_Status* OVMS_StatusGetCode(OVMS_Status* status,
OVMS_Status* OVMS_StatusGetDetails(OVMS_Status* status,
const char** details) {
if (status == nullptr)
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STATUS));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "status"));
if (details == nullptr)
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STRING));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "details"));
ovms::Status* sts = reinterpret_cast(status);
*details = sts->string().c_str();
return nullptr;
@@ -85,7 +104,7 @@ OVMS_Status* OVMS_StatusGetDetails(OVMS_Status* status,
OVMS_Status* OVMS_ServerSettingsNew(OVMS_ServerSettings** settings) {
if (settings == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "settings"));
*settings = reinterpret_cast(new ovms::ServerSettingsImpl);
return nullptr;
@@ -99,7 +118,7 @@ void OVMS_ServerSettingsDelete(OVMS_ServerSettings* settings) {
OVMS_Status* OVMS_ModelsSettingsNew(OVMS_ModelsSettings** settings) {
if (settings == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "model settings"));
*settings = reinterpret_cast(new ovms::ModelsSettingsImpl);
return nullptr;
@@ -114,7 +133,11 @@ void OVMS_ModelsSettingsDelete(OVMS_ModelsSettings* settings) {
OVMS_Status* OVMS_ServerNew(OVMS_Server** server) {
// Create new server once multi server configuration becomes possible.
if (server == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SERVER));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server"));
+ }
+ // Hack to force spdlog singleton to initialize before ovms::Server singleton
+ if (spdlog::get("notUsedLogger")) {
+ return reinterpret_cast(new Status(StatusCode::INTERNAL_ERROR, "unexpected error during spdlog configuration"));
*server = reinterpret_cast(&ovms::Server::instance());
return nullptr;
@@ -132,13 +155,13 @@ OVMS_Status* OVMS_ServerStartFromConfigurationFile(OVMS_Server* server,
OVMS_ServerSettings* server_settings,
OVMS_ModelsSettings* models_settings) {
if (server == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SERVER));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server"));
if (server_settings == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server settings"));
if (models_settings == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "model settings"));
ovms::Server* srv = reinterpret_cast(server);
ovms::ServerSettingsImpl* serverSettings = reinterpret_cast(server_settings);
@@ -152,7 +175,7 @@ OVMS_Status* OVMS_ServerStartFromConfigurationFile(OVMS_Server* server,
OVMS_Status* OVMS_ServerSettingsSetGrpcPort(OVMS_ServerSettings* settings,
uint32_t grpcPort) {
if (settings == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server settings"));
ovms::ServerSettingsImpl* serverSettings = reinterpret_cast(settings);
serverSettings->grpcPort = grpcPort;
@@ -162,7 +185,7 @@ OVMS_Status* OVMS_ServerSettingsSetGrpcPort(OVMS_ServerSettings* settings,
OVMS_Status* OVMS_ServerSettingsSetRestPort(OVMS_ServerSettings* settings,
uint32_t restPort) {
if (settings == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server settings"));
ovms::ServerSettingsImpl* serverSettings = reinterpret_cast(settings);
serverSettings->restPort = restPort;
@@ -172,7 +195,7 @@ OVMS_Status* OVMS_ServerSettingsSetRestPort(OVMS_ServerSettings* settings,
OVMS_Status* OVMS_ServerSettingsSetGrpcWorkers(OVMS_ServerSettings* settings,
uint32_t grpc_workers) {
if (settings == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server settings"));
ovms::ServerSettingsImpl* serverSettings = reinterpret_cast(settings);
serverSettings->grpcWorkers = grpc_workers;
@@ -182,10 +205,10 @@ OVMS_Status* OVMS_ServerSettingsSetGrpcWorkers(OVMS_ServerSettings* settings,
OVMS_Status* OVMS_ServerSettingsSetGrpcBindAddress(OVMS_ServerSettings* settings,
const char* grpc_bind_address) {
if (settings == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server settings"));
if (grpc_bind_address == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STRING));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "grpc bind address"));
ovms::ServerSettingsImpl* serverSettings = reinterpret_cast(settings);
@@ -195,7 +218,7 @@ OVMS_Status* OVMS_ServerSettingsSetGrpcBindAddress(OVMS_ServerSettings* settings
OVMS_Status* OVMS_ServerSettingsSetRestWorkers(OVMS_ServerSettings* settings,
uint32_t rest_workers) {
if (settings == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server settings"));
ovms::ServerSettingsImpl* serverSettings = reinterpret_cast(settings);
serverSettings->restWorkers = rest_workers;
@@ -205,10 +228,10 @@ OVMS_Status* OVMS_ServerSettingsSetRestWorkers(OVMS_ServerSettings* settings,
OVMS_Status* OVMS_ServerSettingsSetRestBindAddress(OVMS_ServerSettings* settings,
const char* rest_bind_address) {
if (settings == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server settings"));
if (rest_bind_address == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STRING));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "rest bind address"));
ovms::ServerSettingsImpl* serverSettings = reinterpret_cast(settings);
@@ -218,10 +241,10 @@ OVMS_Status* OVMS_ServerSettingsSetRestBindAddress(OVMS_ServerSettings* settings
OVMS_Status* OVMS_ServerSettingsSetGrpcChannelArguments(OVMS_ServerSettings* settings,
const char* grpc_channel_arguments) {
if (settings == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server settings"));
if (grpc_channel_arguments == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STRING));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "grpc channel arguments"));
ovms::ServerSettingsImpl* serverSettings = reinterpret_cast(settings);
@@ -231,7 +254,7 @@ OVMS_Status* OVMS_ServerSettingsSetGrpcChannelArguments(OVMS_ServerSettings* set
OVMS_Status* OVMS_ServerSettingsSetFileSystemPollWaitSeconds(OVMS_ServerSettings* settings,
uint32_t seconds) {
if (settings == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server settings"));
ovms::ServerSettingsImpl* serverSettings = reinterpret_cast(settings);
serverSettings->filesystemPollWaitSeconds = seconds;
@@ -241,7 +264,7 @@ OVMS_Status* OVMS_ServerSettingsSetFileSystemPollWaitSeconds(OVMS_ServerSettings
OVMS_Status* OVMS_ServerSettingsSetSequenceCleanerPollWaitMinutes(OVMS_ServerSettings* settings,
uint32_t minutes) {
if (settings == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server settings"));
ovms::ServerSettingsImpl* serverSettings = reinterpret_cast(settings);
serverSettings->sequenceCleanerPollWaitMinutes = minutes;
@@ -251,7 +274,7 @@ OVMS_Status* OVMS_ServerSettingsSetSequenceCleanerPollWaitMinutes(OVMS_ServerSet
OVMS_Status* OVMS_ServerSettingsSetCustomNodeResourcesCleanerIntervalSeconds(OVMS_ServerSettings* settings,
uint32_t seconds) {
if (settings == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server settings"));
ovms::ServerSettingsImpl* serverSettings = reinterpret_cast(settings);
serverSettings->resourcesCleanerPollWaitSeconds = seconds;
@@ -261,10 +284,10 @@ OVMS_Status* OVMS_ServerSettingsSetCustomNodeResourcesCleanerIntervalSeconds(OVM
OVMS_Status* OVMS_ServerSettingsSetCpuExtensionPath(OVMS_ServerSettings* settings,
const char* cpu_extension_path) {
if (settings == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server settings"));
if (cpu_extension_path == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STRING));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "cpu extension path"));
ovms::ServerSettingsImpl* serverSettings = reinterpret_cast(settings);
@@ -274,10 +297,10 @@ OVMS_Status* OVMS_ServerSettingsSetCpuExtensionPath(OVMS_ServerSettings* setting
OVMS_Status* OVMS_ServerSettingsSetCacheDir(OVMS_ServerSettings* settings,
const char* cache_dir) {
if (settings == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server settings"));
if (cache_dir == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STRING));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "cache dir"));
ovms::ServerSettingsImpl* serverSettings = reinterpret_cast(settings);
@@ -287,7 +310,7 @@ OVMS_Status* OVMS_ServerSettingsSetCacheDir(OVMS_ServerSettings* settings,
OVMS_Status* OVMS_ServerSettingsSetLogLevel(OVMS_ServerSettings* settings,
OVMS_LogLevel log_level) {
if (settings == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server settings"));
ovms::ServerSettingsImpl* serverSettings = reinterpret_cast(settings);
switch (log_level) {
@@ -315,10 +338,10 @@ OVMS_Status* OVMS_ServerSettingsSetLogLevel(OVMS_ServerSettings* settings,
OVMS_Status* OVMS_ServerSettingsSetLogPath(OVMS_ServerSettings* settings,
const char* log_path) {
if (settings == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server settings"));
if (log_path == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STRING));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "log path"));
ovms::ServerSettingsImpl* serverSettings = reinterpret_cast(settings);
@@ -328,25 +351,25 @@ OVMS_Status* OVMS_ServerSettingsSetLogPath(OVMS_ServerSettings* settings,
OVMS_Status* OVMS_ModelsSettingsSetConfigPath(OVMS_ModelsSettings* settings,
const char* config_path) {
if (settings == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SETTINGS));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "models settings"));
if (config_path == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STRING));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "config path"));
ovms::ModelsSettingsImpl* modelsSettings = reinterpret_cast(settings);
return nullptr;
// inference API
-OVMS_Status* OVMS_InferenceRequestNew(OVMS_InferenceRequest** request, OVMS_Server* server, const char* servableName, uint32_t servableVersion) {
+OVMS_Status* OVMS_InferenceRequestNew(OVMS_InferenceRequest** request, OVMS_Server* server, const char* servableName, int64_t servableVersion) {
if (request == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_REQUEST));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "inference request"));
if (server == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_SERVER));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "server"));
if (servableName == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STRING));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "servable name"));
*request = reinterpret_cast(new InferenceRequest(servableName, servableVersion));
return nullptr;
@@ -358,51 +381,74 @@ void OVMS_InferenceRequestDelete(OVMS_InferenceRequest* request) {
delete reinterpret_cast(request);
-OVMS_Status* OVMS_InferenceRequestAddInput(OVMS_InferenceRequest* req, const char* inputName, OVMS_DataType datatype, const uint64_t* shape, uint32_t dimCount) {
+OVMS_Status* OVMS_InferenceRequestAddInput(OVMS_InferenceRequest* req, const char* inputName, OVMS_DataType datatype, const int64_t* shape, size_t dimCount) {
if (req == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_REQUEST));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "inference request"));
if (inputName == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STRING));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "input name"));
if (shape == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_TABLE));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "shape"));
InferenceRequest* request = reinterpret_cast(req);
auto status = request->addInput(inputName, datatype, shape, dimCount);
if (!status.ok()) {
return reinterpret_cast(new Status(status));
+ if (spdlog::default_logger_raw()->level() == spdlog::level::trace) {
+ std::stringstream ss;
+ ss << "C-API adding request input for servable: " << request->getServableName()
+ << " version: " << request->getServableVersion()
+ << " name: " << inputName
+ << " datatype: " << toString(ovms::getOVMSDataTypeAsPrecision(datatype))
+ << " shape: [";
+ size_t i = 0;
+ for (i = 0; i < dimCount - 1; ++i) {
+ ss << shape[i] << ", ";
+ }
+ ss << shape[i] << "]";
+ SPDLOG_TRACE(ss.str());
+ }
return nullptr;
OVMS_Status* OVMS_InferenceRequestInputSetData(OVMS_InferenceRequest* req, const char* inputName, const void* data, size_t bufferSize, OVMS_BufferType bufferType, uint32_t deviceId) {
if (req == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_REQUEST));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "inference request"));
if (inputName == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STRING));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "input name"));
if (data == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_DATA));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "data"));
InferenceRequest* request = reinterpret_cast(req);
auto status = request->setInputBuffer(inputName, data, bufferSize, bufferType, deviceId);
if (!status.ok()) {
return reinterpret_cast(new Status(status));
+ if (spdlog::default_logger_raw()->level() == spdlog::level::trace) {
+ std::stringstream ss;
+ ss << "C-API setting request input data for servable: " << request->getServableName()
+ << " version: " << request->getServableVersion()
+ << " name: " << inputName
+ << " bufferType: " << bufferType
+ << " deviceId: " << deviceId;
+ SPDLOG_TRACE(ss.str());
+ }
return nullptr;
OVMS_Status* OVMS_InferenceRequestAddParameter(OVMS_InferenceRequest* req, const char* parameterName, OVMS_DataType datatype, const void* data, size_t byteSize) {
if (req == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_REQUEST));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "inference request"));
if (parameterName == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STRING));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "parameter name"));
if (data == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_DATA));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "data"));
InferenceRequest* request = reinterpret_cast(req);
auto status = request->addParameter(parameterName, datatype, data);
@@ -414,10 +460,10 @@ OVMS_Status* OVMS_InferenceRequestAddParameter(OVMS_InferenceRequest* req, const
OVMS_Status* OVMS_InferenceRequestRemoveParameter(OVMS_InferenceRequest* req, const char* parameterName) {
if (req == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_REQUEST));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "inference request"));
if (parameterName == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STRING));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "parameter name"));
InferenceRequest* request = reinterpret_cast(req);
auto status = request->removeParameter(parameterName);
@@ -429,10 +475,10 @@ OVMS_Status* OVMS_InferenceRequestRemoveParameter(OVMS_InferenceRequest* req, co
OVMS_Status* OVMS_InferenceRequestRemoveInput(OVMS_InferenceRequest* req, const char* inputName) {
if (req == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_REQUEST));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "inference request"));
if (inputName == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STRING));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "input name"));
InferenceRequest* request = reinterpret_cast(req);
auto status = request->removeInput(inputName);
@@ -444,10 +490,10 @@ OVMS_Status* OVMS_InferenceRequestRemoveInput(OVMS_InferenceRequest* req, const
OVMS_Status* OVMS_InferenceRequestInputRemoveData(OVMS_InferenceRequest* req, const char* inputName) {
if (req == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_REQUEST));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "inference request"));
if (inputName == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STRING));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "input name"));
InferenceRequest* request = reinterpret_cast(req);
auto status = request->removeInputBuffer(inputName);
@@ -457,33 +503,33 @@ OVMS_Status* OVMS_InferenceRequestInputRemoveData(OVMS_InferenceRequest* req, co
return nullptr;
-OVMS_Status* OVMS_InferenceResponseGetOutput(OVMS_InferenceResponse* res, uint32_t id, const char** name, OVMS_DataType* datatype, const uint64_t** shape, uint32_t* dimCount, const void** data, size_t* bytesize, OVMS_BufferType* bufferType, uint32_t* deviceId) {
+OVMS_Status* OVMS_InferenceResponseGetOutput(OVMS_InferenceResponse* res, uint32_t id, const char** name, OVMS_DataType* datatype, const int64_t** shape, size_t* dimCount, const void** data, size_t* bytesize, OVMS_BufferType* bufferType, uint32_t* deviceId) {
if (res == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_RESPONSE));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "inference response"));
if (name == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_STRING));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "output name"));
if (datatype == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_NUMBER));
+ return reinterpret_cast(new Status(StatusCode::NONEXISTENT_PTR, "data type"));
if (shape == nullptr) {
- return reinterpret_cast(new Status(StatusCode::NONEXISTENT_TABLE));
+ return reinterpret_cast