diff --git a/.bazelrc b/.bazelrc
index 84a512b46f..e8fe867ba7 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -46,6 +46,13 @@ build --spawn_strategy=standalone
build --genrule_strategy=standalone
build --define=grpc_no_ares=true
+build --define=MEDIAPIPE_DISABLE_GPU=1
+coverage --define=MEDIAPIPE_DISABLE_GPU=1
+test --define=MEDIAPIPE_DISABLE_GPU=1
+
+build --define=MEDIAPIPE_DISABLE=0
+coverage --define=MEDIAPIPE_DISABLE=0
+test --define=MEDIAPIPE_DISABLE=0
# Sets the default Apple platform to macOS.
build --apple_platform_type=macos
@@ -65,6 +72,10 @@ build --cxxopt=-fno-strict-overflow
build --cxxopt=-fno-delete-null-pointer-checks
build --cxxopt=-fwrapv
build --cxxopt=-fstack-protector
+build --cxxopt=-fstack-clash-protection
+build --cxxopt=-Wformat
+build --cxxopt=-Wformat-security
+build --cxxopt=-Werror=format-security
# Adding "--cxxopt=-D_GLIBCXX_USE_CXX11_ABI=0" creates parity with TF
# compilation options. It also addresses memory use due to
@@ -75,3 +86,5 @@ build --experimental_repo_remote_exec
build --force_pic
build --experimental_cc_shared_library
+build --check_visibility=true
+
diff --git a/.gitignore b/.gitignore
index 93a01ebb1b..aae49118fb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,3 +23,4 @@ lib/
!client/python/ovmsclient/lib
cppclean_src
cppclean_test
+tags
diff --git a/BUILD.bazel b/BUILD.bazel
new file mode 100644
index 0000000000..53072b64f2
--- /dev/null
+++ b/BUILD.bazel
@@ -0,0 +1,78 @@
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+licenses(["notice"])
+
+exports_files([
+ "package.json",
+ "yarn.lock",
+])
+
+load("@bazel_skylib//lib:selects.bzl", "selects")
+load("@mediapipe//mediapipe/framework:more_selects.bzl", "more_selects")
+
+config_setting(
+ name = "disable_mediapipe",
+ define_values = {
+ "MEDIAPIPE_DISABLE": "1",
+ },
+ visibility = ["//visibility:public"],
+)
+
+more_selects.config_setting_negation(
+ name = "not_disable_mediapipe",
+ negate = ":disable_mediapipe",
+)
+
+cc_library(
+ name = "ovms_dependencies",
+ deps = [
+ "@tensorflow_serving//tensorflow_serving/apis:prediction_service_cc_proto",
+ "@tensorflow_serving//tensorflow_serving/apis:model_service_cc_proto",
+ "@minitrace//:trace",
+ "@com_github_grpc_grpc//:grpc++",
+ "@org_tensorflow//tensorflow/core:framework",
+ "@com_github_tencent_rapidjson//:rapidjson",
+ "@com_github_gabime_spdlog//:spdlog",
+ "@com_github_jarro2783_cxxopts//:cxxopts",
+ "@awssdk//:s3",
+ "@awssdk//:core",
+ "@awssdk//:deps",
+ "@azure//:storage",
+ "@cpprest//:sdk",
+ "@boost//:lib",
+ "@com_github_googleapis_google_cloud_cpp//google/cloud/storage:storage_client",
+ "@tensorflow_serving//tensorflow_serving/util/net_http/server/public:http_server",
+ "@tensorflow_serving//tensorflow_serving/util/net_http/server/public:http_server_api",
+ "@tensorflow_serving//tensorflow_serving/util:threadpool_executor",
+ "@tensorflow_serving//tensorflow_serving/util:json_tensor",
+ "@linux_openvino//:openvino",
+ "@linux_opencv//:opencv",
+ "@com_github_jupp0r_prometheus_cpp//core",
+ "@oneTBB//:tbb",
+ ] + select({
+ "//conditions:default": [
+ "@mediapipe//mediapipe/framework:calculator_framework",
+ "@mediapipe//mediapipe/framework/port:logging",
+ "@mediapipe//mediapipe/framework/port:parse_text_proto",
+ "@mediapipe//mediapipe/framework/port:status",
+ "@mediapipe_calculators//:mediapipe_calculators",
+ "@model_api//:adapter_api",
+ ],
+ "//:disable_mediapipe" : [],
+ }),
+ visibility = ["//visibility:public"],
+)
diff --git a/Dockerfile.redhat b/Dockerfile.redhat
index 8d58ddea4a..c4d4c6e42f 100644
--- a/Dockerfile.redhat
+++ b/Dockerfile.redhat
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2020-2021 Intel Corporation
+# Copyright (c) 2020-2023 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -22,6 +22,9 @@ LABEL version="1.0.0"
SHELL ["/bin/bash", "-xo", "pipefail", "-c"]
ARG JOBS
+COPY entitlement /etc/pki/entitlement
+COPY rhsm-ca /etc/rhsm/ca
+RUN rm -f /etc/rhsm-host
RUN dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm && dnf clean all && yum update -d6 -y && yum install -d6 -y \
boost169-atomic \
@@ -127,19 +130,73 @@ RUN dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.n
unzip \
vim \
xz \
- https://vault.centos.org/centos/8/AppStream/x86_64/os/Packages/tbb-2018.2-9.el8.x86_64.rpm && \
+ https://vault.centos.org/centos/8/AppStream/x86_64/os/Packages/tbb-2018.2-9.el8.x86_64.rpm \
+ http://mirror.centos.org/centos/8-stream/PowerTools/x86_64/os/Packages/libusb-devel-0.1.5-12.el8.x86_64.rpm \
+ http://mirror.centos.org/centos/8-stream/BaseOS/x86_64/os/Packages/libusb-0.1.5-12.el8.x86_64.rpm \
+ http://mirror.centos.org/centos/8-stream/BaseOS/x86_64/os/Packages/libusbx-devel-1.0.23-4.el8.x86_64.rpm && \
yum clean all
+ARG NVIDIA=0
+# Add Nvidia dev tool if needed
+# hadolint ignore=DL3003
+RUN if [ "$NVIDIA" == "1" ] ; then true ; else exit 0 ; fi ; \
+ yum config-manager --save --set-enabled codeready-builder-for-rhel-8-x86_64-rpms && \
+ yum -y module disable python36 && \
+ yum install -y \
+ libzstd-devel \
+ libcudnn8-8.6.0.163-1.cuda11.8 \
+ libcudnn8-devel-8.6.0.163-1.cuda11.8 \
+ libcutensor1-1.6.1.5-1 \
+ libcutensor-devel-1.6.1.5-1 \
+ cuda-cudart-devel-11-8 && \
+ # ignore errors on hosts with older nvidia drivers
+ yum install -y cuda-11-8 || true && \
+ yum install -y python38-Cython && \
+ curl -L https://github.com/Kitware/ninja/releases/download/v1.10.0.gfb670.kitware.jobserver-1/ninja-1.10.0.gfb670.kitware.jobserver-1_x86_64-linux-gnu.tar.gz | tar xzv --strip-components=1 -C /usr/local/bin && \
+ curl https://github.com/mozilla/sccache/releases/download/v0.2.15/sccache-v0.2.15-x86_64-unknown-linux-musl.tar.gz -L | tar xvzC /usr/local/bin --strip-components=1 --wildcards '*/sccache' && \
+ chmod a+x /usr/local/bin/sccache && \
+ curl https://github.com/Kitware/CMake/releases/download/v3.24.0/cmake-3.24.0-linux-x86_64.tar.gz -L | tar xzvC /usr/local --exclude={doc,man} --strip-components=1 && \
+ curl -L https://github.com/ccache/ccache/releases/download/v4.3/ccache-4.3.tar.xz | tar xJv && \
+ mkdir -p ccache-4.3/build && cd ccache-4.3/build && \
+ cmake -DCMAKE_BUILD_TYPE=Release -G Ninja .. && \
+ ninja -v install && \
+ rm -rf /var/cache/yum
+
# build_type=[ opt, dbg ]
ARG build_type=dbg
-ARG debug_bazel_flags=--strip=never\ --copt="-g"\ -c\ dbg
ARG minitrace_flags
ENV TF_SYSTEM_LIBS="curl"
ENV TEST_LOG="/root/.cache/bazel/_bazel_root/bc57d4817a53cab8c785464da57d1983/execroot/ovms/bazel-out/test.log"
-
+ARG ov_source_branch=master
+ARG ov_contrib_branch=master
+ARG sentencepiece=0
+ARG ov_source_org=openvinotoolkit
+ARG ov_contrib_org=openvinotoolkit
ARG ov_use_binary=1
ARG DLDT_PACKAGE_URL
ARG TEMP_DIR=/tmp/openvino_installer
+ARG CMAKE_BUILD_TYPE=Release
+
+# hadolint ignore=DL3003
+RUN if [[ "$sentencepiece" == "1" || "$NVIDIA" == "1" ]] ; then true ; else exit 0 ; fi ; git clone https://github.com/$ov_contrib_org/openvino_contrib.git /openvino_contrib && cd /openvino_contrib && git checkout $ov_contrib_branch && git submodule update --init --recursive
+
+################### BUILD OPENVINO FROM SOURCE - buildarg ov_use_binary=0 ############################
+# Build OpenVINO and nGraph (OV dependency) with D_GLIBCXX_USE_CXX11_ABI=0 or 1
+# hadolint ignore=DL3003
+RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; git clone https://github.com/$ov_source_org/openvino.git /openvino && cd /openvino && git checkout $ov_source_branch && git submodule update --init --recursive
+WORKDIR /openvino/build
+RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; dnf install -y http://mirror.centos.org/centos/8-stream/PowerTools/x86_64/os/Packages/opencl-headers-2.2-1.20180306gite986688.el8.noarch.rpm && dnf clean all
+RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; cmake -DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" -DENABLE_SAMPLES=0 -DNGRAPH_USE_CXX_ABI=1 -DCMAKE_CXX_FLAGS=" -D_GLIBCXX_USE_CXX11_ABI=1 -Wno-error=parentheses " ..
+RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; make --jobs=$JOBS
+RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; make install
+RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; \
+ mkdir -p /opt/intel/openvino/extras && \
+ mkdir -p /opt/intel/openvino && \
+ ln -s /openvino/inference-engine/temp/opencv_*/opencv /opt/intel/openvino/extras && \
+ ln -s /usr/local/runtime /opt/intel/openvino && \
+ ln -s /openvino/scripts/setupvars/setupvars.sh /opt/intel/openvino/setupvars.sh && \
+ ln -s /opt/intel/openvino /opt/intel/openvino_2023
+################## END OF OPENVINO SOURCE BUILD ######################
################### TAKE OPENVINO FROM A BINARY RELEASE - buildarg ov_use_binary=1 (DEFAULT) ##########
WORKDIR $TEMP_DIR
@@ -149,7 +206,40 @@ RUN if [ "$ov_use_binary" = "1" ] && [ "$DLDT_PACKAGE_URL" != "" ]; then true ;
mkdir /opt/intel && \
tar -zxf l_openvino_toolkit*.tgz -C /opt/intel && \
ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino && \
- ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2022
+ ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2023
+
+# install sample apps including benchmark_app
+RUN yum install -y http://mirror.centos.org/centos/8-stream/PowerTools/x86_64/os/Packages/gflags-devel-2.2.2-1.el8.x86_64.rpm \
+ http://mirror.centos.org/centos/8-stream/PowerTools/x86_64/os/Packages/gflags-2.2.2-1.el8.x86_64.rpm \
+ https://download-ib01.fedoraproject.org/pub/epel/8/Everything/x86_64/Packages/j/json-devel-3.6.1-2.el8.x86_64.rpm && \
+ rm -rf /var/cache/yum
+RUN if [ -f /opt/intel/openvino/samples/cpp/build_samples.sh ]; then /opt/intel/openvino/samples/cpp/build_samples.sh ; fi
+#################### END OF OPENVINO BINARY INSTALL
+
+# SENTENCEPIECE_EXTENSION
+ENV OpenVINO_DIR=/opt/intel/openvino/runtime/cmake
+WORKDIR /openvino_contrib/modules/custom_operations/user_ie_extensions
+RUN if [ "$sentencepiece" == "1" ] ; then true ; else exit 0 ; fi ; cmake .. -DCMAKE_BUILD_TYPE=Release -DCUSTOM_OPERATIONS="sentence_piece" && cmake --build . --parallel $JOBS
+
+# NVIDIA
+ENV OPENVINO_BUILD_PATH=/cuda_plugin_build
+ENV OPENVINO_HOME=/openvino
+ENV OPENVINO_CONTRIB=/openvino_contrib
+
+# hadolint ignore=DL3003
+RUN if [ "$NVIDIA" == "1" ] ; then true ; else exit 0 ; fi ; \
+ mkdir "${OPENVINO_BUILD_PATH}" && \
+ cd "${OPENVINO_BUILD_PATH}" && \
+ cmake "${OPENVINO_HOME}" \
+ -DENABLE_NVIDIA=ON \
+ -DENABLE_TESTS=ON \
+ -DBUILD_arm_plugin=OFF \
+ -DBUILD_java_api=OFF \
+ -DOPENVINO_EXTRA_MODULES="${OPENVINO_CONTRIB}"/modules \
+ -DWHEEL_VERSION=2022.1.0 \
+ -DVERBOSE_BUILD=ON \
+ -DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" && \
+ cmake --build "${OPENVINO_BUILD_PATH}" --target openvino_nvidia_gpu_plugin -j "$JOBS"
# Build OpenVINO Model Server
WORKDIR /ovms
@@ -174,6 +264,14 @@ RUN curl -H "User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHT
RUN yum install -y https://github.com/linux-test-project/lcov/releases/download/v1.16/lcov-1.16-1.noarch.rpm && yum clean all
WORKDIR /ovms
+
+ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/intel/openvino/runtime/lib/intel64/:/opt/opencv/lib/:/opt/intel/openvino/runtime/3rdparty/tbb/lib/
+
+ARG debug_bazel_flags=--strip=never\ --copt="-g"\ -c\ dbg
+RUN if [[ $debug_bazel_flags == *"MEDIAPIPE_DISABLE=1"* ]]; then true ; else exit 0 ; fi ; \
+ sed -i -e 's|3.19.1|3.9.2|g' WORKSPACE && \
+ sed -i -e 's|87407cd28e7a9c95d9f61a098a53cf031109d451a7763e7dd1253abf8b4df422|1fbf1c2962af287607232b2eddeaec9b4f4a7a6f5934e1a9276e9af76952f7e0|g' WORKSPACE
+
# hadolint ignore=DL3059
RUN bazel build --jobs=$JOBS ${debug_bazel_flags} @org_tensorflow//tensorflow/core:framework
@@ -181,7 +279,13 @@ RUN bazel build --jobs=$JOBS ${debug_bazel_flags} @org_tensorflow//tensorflow/co
# hadolint ignore=SC2046
RUN patch -d $(bazel info output_base)/external/build_bazel_rules_apple/ -p1 < /ovms/third_party/build_bazel_rules_apple/bazel_rules_apple.patch
-RUN bazel build --jobs=$JOBS ${debug_bazel_flags} @tensorflow_serving//tensorflow_serving/apis:prediction_service_cc_proto
+# Mediapipe
+COPY BUILD.bazel /ovms/
+COPY yarn.lock /ovms/
+COPY package.json /ovms/
+
+# prebuild dependencies before copying sources
+RUN bazel build --jobs=$JOBS ${debug_bazel_flags} //:ovms_dependencies
# hadolint ignore=DL3059
RUN cp -v /etc/ssl/certs/ca-bundle.crt /etc/ssl/certs/ca-certificates.crt
@@ -199,7 +303,7 @@ RUN make
WORKDIR /ovms
-ARG PROJECT_VERSION="2022.3"
+ARG PROJECT_VERSION="2023.0"
ARG PROJECT_NAME="OpenVINO Model Server"
LABEL description=${PROJECT_NAME}
@@ -207,43 +311,44 @@ LABEL description=${PROJECT_NAME}
RUN bash -c "sed -i -e 's|REPLACE_PROJECT_NAME|${PROJECT_NAME}|g' /ovms/src/version.hpp"
RUN if [ "$build_type" = "dbg" ] ; then bash -c "sed -i -e 's|REPLACE_PROJECT_VERSION|${PROJECT_VERSION}-debug|g' /ovms/src/version.hpp" ; else bash -c "sed -i -e 's|REPLACE_PROJECT_VERSION|${PROJECT_VERSION}|g' /ovms/src/version.hpp" ; fi ;
RUN if [ "$ov_use_binary" = "1" ] ; then true ; else exit 0 ; fi ; sed -i -e "s#REPLACE_OPENVINO_NAME#$(find /opt/intel/ -maxdepth 1 -mindepth 1 -type d | grep openvino | grep -Eo '[0-9]{4}.[0-9].[0-9].[0-9]+.[^_]+')#g" /ovms/src/version.hpp
-
-ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/intel/openvino/runtime/lib/intel64/:/opt/opencv/lib/:/opt/intel/openvino/runtime/3rdparty/tbb/lib/
+RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; sed -i -e "s#REPLACE_OPENVINO_NAME#$(git --git-dir /openvino/.git log -n 1 | head -n 1 | cut -d' ' -f2 | head -c 12)#g" /ovms/src/version.hpp
# Test Coverage
-COPY check_coverage.bat /ovms/
+COPY ci/check_coverage.bat /ovms/
ARG CHECK_COVERAGE=0
+
ARG RUN_TESTS=1
-RUN if [ "$RUN_TESTS" == "1" ] ; then if [ "$CHECK_COVERAGE" = "1" ] ; then bazel coverage --combined_report=lcov --test_summary=detailed --test_output=streamed //src:ovms_test > ${TEST_LOG} 2>&1 || { cat ${TEST_LOG} && rm -rf ${TEST_LOG} && exit 1 ; } && genhtml --output genhtml "$(bazel info output_path)/_coverage/_coverage_report.dat" ; fi ; \
- bazel test ${debug_bazel_flags} --jobs=$JOBS --test_summary=detailed --test_output=streamed //src:ovms_test > ${TEST_LOG} 2>&1 || (cat ${TEST_LOG} && rm -rf ${TEST_LOG} && exit 1 ; ) && tail -n 100 ${TEST_LOG} && rm -rf ${TEST_LOG} ; fi ;
+RUN if [ "$RUN_TESTS" == "1" ] ; then if [ "$CHECK_COVERAGE" = "1" ] ; then bazel coverage --combined_report=lcov --jobs=$JOBS ${debug_bazel_flags} --test_timeout=1800 --test_summary=detailed --test_output=streamed --test_filter=-*Stress*Mediapipe* //src:ovms_test > ${TEST_LOG} 2>&1 || { cat ${TEST_LOG} && rm -rf ${TEST_LOG} && exit 1 ; } && genhtml --output genhtml "$(bazel info output_path)/_coverage/_coverage_report.dat" ; fi ; \
+ bazel test --jobs=$JOBS ${debug_bazel_flags} --test_timeout=1800 --test_summary=detailed --test_output=streamed --test_filter=-*Stress*Mediapipe* //src:ovms_test > ${TEST_LOG} 2>&1 || (cat ${TEST_LOG} && rm -rf ${TEST_LOG} && exit 1 ; ) && tail -n 100 ${TEST_LOG} && rm -rf ${TEST_LOG} ; fi ;
# C api shared library
-RUN bazel build ${debug_bazel_flags} --jobs $JOBS //src:ovms_shared
+RUN bazel build --jobs $JOBS ${debug_bazel_flags} //src:ovms_shared
# C api app with bazel
# hadolint ignore=DL3059
-RUN bazel build ${debug_bazel_flags} --jobs $JOBS //src:capi_cpp_example
+RUN bazel build --jobs $JOBS ${debug_bazel_flags} //src:capi_cpp_example
# C-API benchmark app
-RUN bazel build //src:capi_benchmark && ./bazel-bin/src/capi_benchmark --niter 2 --threads_per_ireq 2 --nireq 1 --servable_name "dummy" --inputs_names "b" --shape "b[1,10]"
+RUN bazel build --jobs=$JOBS ${debug_bazel_flags} //src:capi_benchmark && ./bazel-bin/src/capi_benchmark --niter 2 --threads_per_ireq 2 --nireq 1 --servable_name "dummy" --inputs_names "b" --shape "b[1,10]"
# OVMS
-RUN bazel build ${debug_bazel_flags} ${minitrace_flags} --jobs=$JOBS //src:ovms
+RUN bazel build --jobs=$JOBS ${debug_bazel_flags} ${minitrace_flags} //src:ovms
# hadolint ignore=DL3059
-RUN bazel build ${debug_bazel_flags} --jobs=$JOBS //src:libsampleloader.so
+RUN bazel build --jobs=$JOBS ${debug_bazel_flags} //src:libsampleloader.so
# C-api C/C++ app with gcc
-COPY MakefileCapi .
-RUN make -f MakefileCapi cpp && make -f MakefileCapi c
+COPY MakefileCapi /ovms/
+RUN make -f MakefileCapi cpp BAZEL_DEBUG_FLAGS="${debug_bazel_flags}" && \
+ make -f MakefileCapi c BAZEL_DEBUG_FLAGS="${debug_bazel_flags}"
ARG ovms_metadata_file
COPY ${ovms_metadata_file} metadata.json
-RUN if [ "$build_type" == "dbg" ] ; then bash -c "cp /ovms/bazel-out/k8-dbg/bin/src/ovms /ovms/bazel-bin/src/ovms" ; else exit 0; fi ;
RUN /ovms/bazel-bin/src/ovms --version && /ovms/bazel-bin/src/ovms
COPY release_files/thirdparty-licenses/ /ovms/release_files/thirdparty-licenses/
COPY release_files/LICENSE /ovms/release_files/LICENSE
COPY client /client
COPY demos /demos
+RUN rm -Rf /etc/entitlement /etc/rhsm/ca
diff --git a/Dockerfile.ubuntu b/Dockerfile.ubuntu
index 2629dd2f5c..7ad5c0238f 100644
--- a/Dockerfile.ubuntu
+++ b/Dockerfile.ubuntu
@@ -24,14 +24,10 @@ SHELL ["/bin/bash", "-xo", "pipefail", "-c"]
ARG JOBS
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install --no-install-recommends -y \
- libboost-atomic1.71.0 \
- libboost-chrono1.71.0 \
- libboost-filesystem1.71.0 \
- libboost-program-options1.71.0 \
- libboost-thread1.71.0 \
- libboost-system1.71.0 \
- libboost-date-time1.71.0 \
build-essential \
+ gcc-9 \
+ g++-9 \
+ make \
cmake \
automake \
autoconf \
@@ -45,6 +41,8 @@ RUN apt-get update && apt-get install --no-install-recommends -y \
pkg-config \
wget \
zlib1g-dev && \
+ update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 9 && \
+ update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-9 9 && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
@@ -52,7 +50,9 @@ RUN apt-get update && apt-get install --no-install-recommends -y \
WORKDIR /boost
# hadolint ignore=DL3003
RUN wget -nv https://sourceforge.net/projects/boost/files/boost/1.69.0/boost_1_69_0.tar.gz && \
-tar xvf boost_1_69_0.tar.gz && cd boost_1_69_0 && ./bootstrap.sh && \
+tar xf boost_1_69_0.tar.gz && cd boost_1_69_0 && ./bootstrap.sh && \
+sed -i -e 's|#if PTHREAD_STACK_MIN > 0|#ifdef PTHREAD_STACK_MIN|g' boost/thread/pthread/thread_data.hpp && \
+# fix for compiler >=9.5 https://github.com/boostorg/thread/pull/297/files
./b2 -j ${JOBS} cxxstd=17 link=static cxxflags='-fPIC' cflags='-fPIC' \
--with-chrono --with-date_time --with-filesystem --with-program_options --with-system \
--with-random --with-thread --with-atomic --with-regex \
@@ -73,13 +73,14 @@ WORKDIR /azure/cpprestsdk/Release/build.release
RUN cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON -DCMAKE_CXX_FLAGS="-fPIC" -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DBoost_USE_STATIC_RUNTIME=ON -DBoost_USE_STATIC_LIBS=ON -DWERROR=OFF -DBUILD_SAMPLES=OFF -DBUILD_TESTS=OFF && make --jobs=$JOBS install
WORKDIR /azure/azure-storage-cpp/Microsoft.WindowsAzure.Storage/build.release
-RUN CASABLANCA_DIR=/azure/cpprestsdk cmake .. -DCMAKE_CXX_FLAGS="-fPIC" -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DBoost_USE_STATIC_RUNTIME=ON -DBoost_USE_STATIC_LIBS=ON -DCMAKE_VERBOSE_MAKEFILE=ON && make --jobs=$JOBS && make --jobs=$JOBS install
+RUN CASABLANCA_DIR=/azure/cpprestsdk cmake .. -DCMAKE_CXX_FLAGS="-fPIC -Wno-error=deprecated-declarations" -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DBoost_USE_STATIC_RUNTIME=ON -DBoost_USE_STATIC_LIBS=ON -DCMAKE_VERBOSE_MAKEFILE=ON && make --jobs=$JOBS && make --jobs=$JOBS install
+# no-error flag related to https://github.com/aws/aws-sdk-cpp/issues/1582
####### End of Azure SDK
# Build AWS S3 SDK
RUN git clone https://github.com/aws/aws-sdk-cpp.git --branch 1.7.129 --single-branch --depth 1 /awssdk
WORKDIR /awssdk/build
-RUN cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_ONLY=s3 -DENABLE_TESTING=OFF -DBUILD_SHARED_LIBS=OFF -DMINIMIZE_SIZE=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DFORCE_SHARED_CRT=OFF -DSIMPLE_INSTALL=OFF -DCMAKE_CXX_FLAGS=" -D_GLIBCXX_USE_CXX11_ABI=1 " .. && make --jobs=$JOBS
+RUN cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_ONLY=s3 -DENABLE_TESTING=OFF -DBUILD_SHARED_LIBS=OFF -DMINIMIZE_SIZE=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DFORCE_SHARED_CRT=OFF -DSIMPLE_INSTALL=OFF -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=1 -Wno-error=deprecated-declarations -Wuninitialized" .. && make --jobs=$JOBS
####### End of AWS S3 SDK
@@ -109,23 +110,24 @@ RUN apt-get update && apt-get install --no-install-recommends -y \
nlohmann-json3-dev \
python2 \
python2-dev \
- python-setuptools \
- python3 \
- python3-pip \
- python3-dev \
- python-is-python3 \
- python3-setuptools \
- python3-virtualenv \
- python3-numpy \
- python-is-python3 \
unzip \
vim \
xz-utils && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
+RUN apt-get update && apt-get install -y software-properties-common gpg gpg-agent --no-install-recommends && \
+ add-apt-repository ppa:deadsnakes/ppa && \
+ apt-get install -y python3.8 python3.8-dev python3.8-distutils python3-pip --no-install-recommends && \
+ apt-get clean && rm -rf /var/lib/apt/lists/* && \
+ update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.8 1 && \
+ python3 -m pip install numpy==1.21.0 --no-cache-dir
+
ARG JOBS
ARG ov_use_binary=1
+ARG sentencepiece=0
+ARG ov_source_org=openvinotoolkit
+ARG ov_contrib_org=openvinotoolkit
ARG DLDT_PACKAGE_URL
ARG ov_source_branch=master
ARG ov_contrib_branch=master
@@ -134,17 +136,18 @@ ARG CMAKE_BUILD_TYPE=Release
# build_type=[ opt, dbg ]
ARG build_type=opt
-ARG debug_bazel_flags=--strip=never\ --copt="-g"\ -c\ dbg
ARG minitrace_flags
ENV HDDL_INSTALL_DIR=/opt/intel/openvino/deployment_tools/inference_engine/external/hddl
ENV TF_SYSTEM_LIBS="curl"
ENV TEST_LOG="/root/.cache/bazel/_bazel_root/bc57d4817a53cab8c785464da57d1983/execroot/ovms/bazel-out/test.log"
+ARG NVIDIA=0
+# hadolint ignore=DL3003
+RUN if [[ "$sentencepiece" == "1" || "$NVIDIA" == "1" ]] ; then true ; else exit 0 ; fi ; git clone https://github.com/$ov_contrib_org/openvino_contrib.git /openvino_contrib && cd /openvino_contrib && git checkout $ov_contrib_branch && git submodule update --init --recursive
################### BUILD OPENVINO FROM SOURCE - buildarg ov_use_binary=0 ############################
# Build OpenVINO and nGraph (OV dependency) with D_GLIBCXX_USE_CXX11_ABI=0 or 1
-
# hadolint ignore=DL3003
-RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; git clone https://github.com/openvinotoolkit/openvino /openvino && cd /openvino && git checkout $ov_source_branch && git submodule update --init --recursive
+RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; git clone https://github.com/$ov_source_org/openvino.git /openvino && cd /openvino && git checkout $ov_source_branch && git submodule update --init --recursive
WORKDIR /openvino/build
RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; cmake -DCMAKE_BUILD_TYPE=$CMAKE_BUILD_TYPE -DENABLE_SAMPLES=0 -DNGRAPH_USE_CXX_ABI=1 -DCMAKE_CXX_FLAGS=" -D_GLIBCXX_USE_CXX11_ABI=1 -Wno-error=parentheses " ..
RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; make --jobs=$JOBS
@@ -155,7 +158,7 @@ RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; \
ln -s /openvino/inference-engine/temp/opencv_*_ubuntu20/opencv /opt/intel/openvino/extras && \
ln -s /usr/local/runtime /opt/intel/openvino && \
ln -s /openvino/scripts/setupvars/setupvars.sh /opt/intel/openvino/setupvars.sh && \
- ln -s /opt/intel/openvino /opt/intel/openvino_2022
+ ln -s /opt/intel/openvino /opt/intel/openvino_2023
################## END OF OPENVINO SOURCE BUILD ######################
ARG TEMP_DIR=/tmp/openvino_installer
@@ -168,7 +171,7 @@ WORKDIR $TEMP_DIR
# chmod 755 l_openvino_toolkit_* && \
# ./l_openvino_toolkit_* -a -s --eula accept && \
# rm -rf /opt/intel/openvino && \
-# ln -s /opt/intel/openvino_2022 /opt/intel/openvino
+# ln -s /opt/intel/openvino_2023 /opt/intel/openvino
# OV toolkit package
RUN if [ "$ov_use_binary" == "1" ] && [ "$DLDT_PACKAGE_URL" != "" ]; then true ; else exit 0 ; fi ; \
@@ -176,7 +179,7 @@ RUN if [ "$ov_use_binary" == "1" ] && [ "$DLDT_PACKAGE_URL" != "" ]; then true ;
mkdir /opt/intel && \
tar -zxf l_openvino_toolkit*.tgz -C /opt/intel && \
ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino && \
- ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2022
+ ln -s /opt/intel/l_openvino_toolkit* /opt/intel/openvino_2023
WORKDIR /
# apt package
@@ -188,18 +191,27 @@ RUN if [ "$ov_use_binary" = "1" ] && [ "$DLDT_PACKAGE_URL" = "" ] ; then true ;
apt-get update && \
apt-get install --no-install-recommends -y $APT_OV_PACKAGE && \
rm -rf /var/lib/apt/lists/* && \
- ln -s /opt/intel/openvino_2022 /opt/intel/openvino
+ ln -s /opt/intel/openvino_2023 /opt/intel/openvino
+
+RUN wget -nv https://github.com/oneapi-src/oneTBB/releases/download/v2021.9.0/oneapi-tbb-2021.9.0-lin.tgz && \
+ tar -xzf oneapi-tbb-2021.9.0-lin.tgz && \
+ cp oneapi-tbb-2021.9.0/lib/intel64/gcc4.8/libtbb.so* /opt/intel/openvino/runtime/lib/intel64/
# install sample apps including benchmark_app
RUN if [ -f /opt/intel/openvino/samples/cpp/build_samples.sh ]; then /opt/intel/openvino/samples/cpp/build_samples.sh ; fi
#################### END OF OPENVINO BINARY INSTALL
+# SENTENCEPIECE_EXTENSION
+ENV OpenVINO_DIR=/opt/intel/openvino/runtime/cmake
+
+WORKDIR /openvino_contrib/modules/custom_operations/user_ie_extensions
+RUN if [ "$sentencepiece" == "1" ] ; then true ; else exit 0 ; fi ; cmake .. -DCMAKE_BUILD_TYPE=Release -DCUSTOM_OPERATIONS="sentence_piece" && cmake --build . --parallel $JOBS
+
# NVIDIA
ENV OPENVINO_BUILD_PATH=/cuda_plugin_build
ENV OPENVINO_HOME=/openvino
ENV OPENVINO_CONTRIB=/openvino_contrib
-ARG NVIDIA=0
# Add Nvidia dev tool if needed
# hadolint ignore=DL3003
RUN if [ "$NVIDIA" == "1" ] ; then true ; else exit 0 ; fi ; \
@@ -223,7 +235,6 @@ RUN if [ "$NVIDIA" == "1" ] ; then true ; else exit 0 ; fi ; \
# hadolint ignore=DL3003
RUN if [ "$NVIDIA" == "1" ] ; then true ; else exit 0 ; fi ; \
- git clone https://github.com/openvinotoolkit/openvino_contrib.git /openvino_contrib && cd /openvino_contrib && git checkout $ov_contrib_branch && git submodule update --init --recursive && \
mkdir ${OPENVINO_BUILD_PATH} && \
cd "${OPENVINO_BUILD_PATH}" && \
cmake "${OPENVINO_HOME}" \
@@ -231,6 +242,10 @@ RUN if [ "$NVIDIA" == "1" ] ; then true ; else exit 0 ; fi ; \
-DENABLE_TESTS=ON \
-DBUILD_arm_plugin=OFF \
-DBUILD_java_api=OFF \
+ -DBUILD_custom_operations=OFF \
+ -DBUILD_mo_pytorch=OFF \
+ -DBUILD_optimum=OFF \
+ -DBUILD_ovms_ai_extension=OFF \
-DOPENVINO_EXTRA_MODULES="${OPENVINO_CONTRIB}/modules" \
-DWHEEL_VERSION=2022.1.0 \
-DVERBOSE_BUILD=ON \
@@ -250,7 +265,13 @@ RUN curl -H "User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHT
WORKDIR /ovms
COPY .bazelrc WORKSPACE /ovms/
COPY external /ovms/external/
-COPY MakefileCapi /ovms/
+
+ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/intel/openvino/runtime/lib/intel64/:/opt/opencv/lib/
+
+ARG debug_bazel_flags=--strip=never\ --copt="-g "\ -c\ dbg
+RUN if [[ $debug_bazel_flags == *"MEDIAPIPE_DISABLE=1"* ]]; then true ; else exit 0 ; fi ; \
+ sed -i -e 's|3.19.1|3.9.2|g' WORKSPACE && \
+ sed -i -e 's|87407cd28e7a9c95d9f61a098a53cf031109d451a7763e7dd1253abf8b4df422|1fbf1c2962af287607232b2eddeaec9b4f4a7a6f5934e1a9276e9af76952f7e0|g' WORKSPACE
RUN bazel build --jobs=$JOBS ${debug_bazel_flags} @org_tensorflow//tensorflow/core:framework
@@ -262,7 +283,13 @@ COPY third_party /ovms/third_party/
# hadolint ignore=SC2046
RUN patch -d $(bazel info output_base)/external/build_bazel_rules_apple/ -p1 < /ovms/third_party/build_bazel_rules_apple/bazel_rules_apple.patch
-RUN bazel build --jobs=$JOBS ${debug_bazel_flags} @tensorflow_serving//tensorflow_serving/apis:prediction_service_cc_proto
+# Mediapipe
+COPY BUILD.bazel /ovms/
+COPY yarn.lock /ovms/
+COPY package.json /ovms/
+
+# prebuild dependencies before copying sources
+RUN bazel build --jobs=$JOBS ${debug_bazel_flags} //:ovms_dependencies
# Copy example clients into build image for static analysis
WORKDIR /example_cpp_client
@@ -286,38 +313,40 @@ RUN bash -c "sed -i -e 's|REPLACE_PROJECT_NAME|${PROJECT_NAME}|g' /ovms/src/vers
RUN if [ "$build_type" == "dbg" ] ; then bash -c "sed -i -e 's|REPLACE_PROJECT_VERSION|${PROJECT_VERSION}-debug|g' /ovms/src/version.hpp" ; else bash -c "sed -i -e 's|REPLACE_PROJECT_VERSION|${PROJECT_VERSION}|g' /ovms/src/version.hpp" ; fi ;
RUN if [ "$ov_use_binary" == "1" ] ; then true ; else exit 0 ; fi ; sed -i -e "s#REPLACE_OPENVINO_NAME#$(find /opt/intel/ -maxdepth 1 -mindepth 1 -type d | grep openvino | grep -Eo '[0-9]{4}.[0-9].[0-9].[0-9]+.[^_]+')#g" /ovms/src/version.hpp
RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; sed -i -e "s#REPLACE_OPENVINO_NAME#$(git --git-dir /openvino/.git log -n 1 | head -n 1 | cut -d' ' -f2 | head -c 12)#g" /ovms/src/version.hpp
-ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/intel/openvino/runtime/lib/intel64/:/opt/opencv/lib/:/opt/intel/openvino/runtime/3rdparty/tbb/lib/
# Test Coverage
-COPY check_coverage.bat /ovms/
+COPY ci/check_coverage.bat /ovms/
ARG CHECK_COVERAGE=0
+
ARG RUN_TESTS=1
-RUN if [ "$RUN_TESTS" == "1" ] ; then if [ "$CHECK_COVERAGE" == "1" ] ; then bazel coverage --combined_report=lcov --test_summary=detailed --test_output=streamed //src:ovms_test > ${TEST_LOG} 2>&1 || { cat ${TEST_LOG} && rm -rf ${TEST_LOG} && exit 1 ; } && genhtml --output genhtml "$(bazel info output_path)/_coverage/_coverage_report.dat" ; fi ; \
- bazel test ${debug_bazel_flags} --jobs=$JOBS --test_summary=detailed --test_output=streamed //src:ovms_test > ${TEST_LOG} 2>&1 || (cat ${TEST_LOG} && rm -rf ${TEST_LOG} && exit 1 ; ) && tail -n 100 ${TEST_LOG} && rm -rf ${TEST_LOG} ; fi ;
+RUN if [ "$RUN_TESTS" == "1" ] ; then if [ "$CHECK_COVERAGE" == "1" ] ; then \
+ bazel coverage --combined_report=lcov --jobs=$JOBS ${debug_bazel_flags} --test_timeout=1800 --test_summary=detailed --test_output=streamed --test_filter=-*Stress*Mediapipe* //src:ovms_test > ${TEST_LOG} 2>&1 || { cat ${TEST_LOG} && rm -rf ${TEST_LOG} && exit 1 ; } && genhtml --output genhtml "$(bazel info output_path)/_coverage/_coverage_report.dat" ; fi ; \
+ bazel test --jobs=$JOBS ${debug_bazel_flags} --test_timeout=1800 --test_summary=detailed --test_output=streamed --test_filter=-*Stress*Mediapipe* //src:ovms_test > ${TEST_LOG} 2>&1 || (cat ${TEST_LOG} && rm -rf ${TEST_LOG} && exit 1 ; ) && tail -n 100 ${TEST_LOG} && rm -rf ${TEST_LOG} ; fi ;
# C api shared library
-RUN bazel build ${debug_bazel_flags} --jobs $JOBS //src:ovms_shared
+RUN bazel build --jobs=$JOBS ${debug_bazel_flags} //src:ovms_shared
# C api app with bazel
# hadolint ignore=DL3059
-RUN bazel build ${debug_bazel_flags} --jobs $JOBS //src:capi_cpp_example
+RUN bazel build --jobs $JOBS ${debug_bazel_flags} //src:capi_cpp_example
# C-API benchmark app
-RUN bazel build //src:capi_benchmark && ./bazel-bin/src/capi_benchmark --niter 2 --threads_per_ireq 2 --nireq 1 --servable_name "dummy" --inputs_names "b" --shape "b[1,10]"
+RUN bazel build --jobs=$JOBS ${debug_bazel_flags} //src:capi_benchmark && ./bazel-bin/src/capi_benchmark --niter 2 --threads_per_ireq 2 --nireq 1 --servable_name "dummy" --inputs_names "b" --shape "b[1,10]"
# OVMS
-RUN bazel build ${debug_bazel_flags} ${minitrace_flags} --jobs=$JOBS //src:ovms
+RUN bazel build --jobs=$JOBS ${debug_bazel_flags} ${minitrace_flags} //src:ovms
# hadolint ignore=DL3059
-RUN bazel build ${debug_bazel_flags} --jobs=$JOBS //src:libsampleloader.so
+RUN bazel build --jobs=$JOBS ${debug_bazel_flags} //src:libsampleloader.so
# C-api C/C++ app with gcc
-RUN make -f MakefileCapi cpp && make -f MakefileCapi c
+COPY MakefileCapi /ovms/
+RUN make -f MakefileCapi cpp BAZEL_DEBUG_FLAGS="${debug_bazel_flags}" && \
+ make -f MakefileCapi c BAZEL_DEBUG_FLAGS="${debug_bazel_flags}"
ARG ovms_metadata_file
COPY ${ovms_metadata_file} metadata.json
-RUN if [ "$build_type" == "dbg" ] ; then bash -c "cp /ovms/bazel-out/k8-dbg/bin/src/ovms /ovms/bazel-bin/src/ovms" ; else exit 0; fi ;
RUN /ovms/bazel-bin/src/ovms --version && /ovms/bazel-bin/src/ovms
COPY release_files/thirdparty-licenses/ /ovms/release_files/thirdparty-licenses/
diff --git a/DockerfileMakePackage b/DockerfileMakePackage
index 37164dd535..351ad5c66e 100644
--- a/DockerfileMakePackage
+++ b/DockerfileMakePackage
@@ -21,6 +21,7 @@ FROM $BUILD_IMAGE
ARG BASE_OS=ubuntu
ARG ov_use_binary=1
ARG NVIDIA=0
+ARG sentencepiece=0
SHELL ["/bin/bash", "-c"]
RUN mkdir /patchelf && cd /patchelf && \
@@ -28,28 +29,30 @@ RUN mkdir /patchelf && cd /patchelf && \
tar -xf 0.10.tar.gz && ls -lah && cd */ && \
./bootstrap.sh && ./configure && make && make install
-RUN mkdir -vp /ovms_release/bin
-RUN mkdir -vp /ovms_release/deps
-RUN mkdir -vp /ovms_release/lib
-RUN mkdir -vp /ovms_release/lib/hddl/config
-RUN mkdir -vp /ovms_release/lib/custom_nodes
+RUN mkdir -vp /ovms_release/bin && \
+ mkdir -vp /ovms_release/deps && \
+ mkdir -vp /ovms_release/lib && \
+ mkdir -vp /ovms_release/lib/hddl/config && \
+ mkdir -vp /ovms_release/lib/custom_nodes
RUN if [ -d /ovms/src/custom_nodes/lib/${BASE_OS} ] ; then true ; else exit 0 ; fi ; cp /ovms/src/custom_nodes/lib/${BASE_OS}/*.so /ovms_release/lib/custom_nodes/
+RUN if [ -d /ovms/src/custom_nodes/tokenizer/lib/${BASE_OS} ] ; then true ; else exit 0 ; fi ; cp /ovms/src/custom_nodes/tokenizer/lib/${BASE_OS}/*.so /ovms_release/lib/custom_nodes/
RUN cp /ovms/metadata.json /ovms_release/
+RUN if [ "$sentencepiece" == "1" ]; then true ; else exit 0 ; fi ; cp -v /openvino_contrib/modules/custom_operations/user_ie_extensions/user_ie_extensions/libuser_ov_extensions.so /ovms_release/lib/
RUN if [ "$NVIDIA" == "1" ]; then true ; else exit 0 ; fi ; cp -v /openvino/bin/intel64/Release/libopenvino_nvidia_gpu_plugin.so /ovms_release/lib/
-RUN if [ "$ov_use_binary" == "0" ] ; then true ; else exit 0 ; fi ; cp -v /openvino/bin/intel64/Release/plugins.xml /ovms_release/lib/
+RUN if [ "$NVIDIA" == "1" ]; then true ; else exit 0 ; fi ; echo '' > /ovms_release/lib/plugins.xml
RUN if [ "$ov_use_binary" == "1" ] ; then true ; else exit 0 ; fi ; cp -v /opt/intel/openvino/runtime/3rdparty/hddl/config/* /ovms_release/lib/hddl/config/ || true
RUN if [ "$ov_use_binary" == "1" ] ; then true ; else exit 0 ; fi ; cp -vr /opt/intel/openvino/runtime/3rdparty/hddl/etc/* /ovms_release/lib/hddl/etc/ || true
-RUN if [ "$ov_use_binary" == "1" ] ; then true ; else exit 0 ; fi ; cp -v /opt/intel/openvino/runtime/lib/intel64/plugins.xml /ovms_release/lib/ && cp /opt/intel/openvino/install_dependencies/* /ovms_release/deps/
+RUN if [ "$ov_use_binary" == "1" ] ; then true ; else exit 0 ; fi ; cp /opt/intel/openvino/install_dependencies/* /ovms_release/deps/
RUN if [ "$ov_use_binary" == "1" ] ; then true ; else exit 0 ; fi ; rm -vrf /ovms_release/deps/*-devel-*
RUN find /ovms/bazel-out/k8-*/bin -iname '*.so*' -exec cp -v {} /ovms_release/lib/ \;
RUN cd /ovms_release/lib/ ; rm -f libazurestorage.so.* ; ln -s libazurestorage.so libazurestorage.so.7 ;ln -s libazurestorage.so libazurestorage.so.7.5
RUN cd /ovms_release/lib/ ; rm -f libcpprest.so.2.10 ; ln -s libcpprest.so libcpprest.so.2.10
-RUN rm -f /ovms_release/lib/libssl.so
-RUN rm -f /ovms_release/lib/libsampleloader*
-RUN rm -f /ovms_release/lib/lib_node*
-RUN rm -f /ovms_release/lib/libcustom_node*
+RUN rm -f /ovms_release/lib/libssl.so && \
+ rm -f /ovms_release/lib/libsampleloader* && \
+ rm -f /ovms_release/lib/lib_node* && \
+ rm -f /ovms_release/lib/libcustom_node*
# Remove coverage libaries
RUN if [ -f /ovms_release/lib/libjava.so ] ; then true ; else exit 0 ; fi ;cd /ovms_release/lib/ &&\
@@ -78,21 +81,24 @@ RUN if [ "$BASE_OS" == "redhat" ] ; then true ; else exit 0 ; fi ; cp /usr/lib64
RUN find /ovms/bazel-bin/src -name 'ovms' -type f -exec cp -v {} /ovms_release/bin \;
WORKDIR /ovms_release/bin
RUN patchelf --remove-rpath ./ovms && patchelf --set-rpath '$ORIGIN/../lib/' ./ovms
-RUN find /ovms_release/lib/ -iname '*.so*' -exec patchelf --debug --remove-rpath {} \;
-RUN find /ovms_release/lib/ -iname '*.so*' -exec patchelf --debug --set-rpath '$ORIGIN/../lib' {} \;
+RUN find /ovms_release/lib/ -iname '*.so*' -exec patchelf --debug --remove-rpath {} \; && \
+ find /ovms_release/lib/ -iname '*.so*' -exec patchelf --debug --set-rpath '$ORIGIN/../lib' {} \;
WORKDIR /ovms
-RUN cp -v /ovms/release_files/LICENSE /ovms_release/
-RUN cp -rv /ovms/release_files/thirdparty-licenses /ovms_release/
-RUN mkdir -vp /ovms_release/include && cp /ovms/src/ovms.h /ovms_release/include
+RUN cp -v /ovms/release_files/LICENSE /ovms_release/ && \
+ cp -rv /ovms/release_files/thirdparty-licenses /ovms_release/ && \
+ mkdir -vp /ovms_release/include && cp /ovms/src/ovms.h /ovms_release/include && \
+ ls -lahR /ovms_release/
-RUN ls -lahR /ovms_release/
-
-RUN find /ovms_release/lib/ -iname '*.so*' -type f -exec patchelf --remove-rpath {} \;
-RUN find /ovms_release/lib/ -iname '*.so*' -type f -exec patchelf --set-rpath '$ORIGIN/../lib' {} \;
+RUN find /ovms_release/lib/ -iname '*.so*' -type f -exec patchelf --remove-rpath {} \; && \
+ find /ovms_release/lib/ -iname '*.so*' -type f -exec patchelf --set-rpath '$ORIGIN/../lib' {} \;
WORKDIR /
-RUN tar czf ovms.tar.gz --transform 's/ovms_release/ovms/' /ovms_release/ && sha256sum ovms.tar.gz > ovms.tar.gz.sha256 && cp /ovms_release/metadata.json /ovms.tar.gz.metadata.json
-RUN tar cJf ovms.tar.xz --transform 's/ovms_release/ovms/' /ovms_release/ && sha256sum ovms.tar.xz > ovms.tar.xz.sha256 && cp /ovms_release/metadata.json /ovms.tar.xz.metadata.json
+RUN tar czf ovms.tar.gz --transform 's/ovms_release/ovms/' /ovms_release/ && \
+ sha256sum ovms.tar.gz > ovms.tar.gz.sha256 && \
+ cp /ovms_release/metadata.json /ovms.tar.gz.metadata.json && \
+ tar cJf ovms.tar.xz --transform 's/ovms_release/ovms/' /ovms_release/ && \
+ sha256sum ovms.tar.xz > ovms.tar.xz.sha256 && \
+ cp /ovms_release/metadata.json /ovms.tar.xz.metadata.json
ENTRYPOINT [ ]
diff --git a/Makefile b/Makefile
index 829e1a09b0..3ac306c64b 100644
--- a/Makefile
+++ b/Makefile
@@ -44,12 +44,18 @@ CHECK_COVERAGE ?=0
RUN_TESTS ?= 1
NVIDIA ?=0
BUILD_NGINX ?= 0
+MEDIAPIPE_DISABLE ?= 0
# NOTE: when changing any value below, you'll need to adjust WORKSPACE file by hand:
# - uncomment source build section, comment binary section
# - adjust binary version path - version variable is not passed to WORKSPACE file!
-OV_SOURCE_BRANCH ?= master
-OV_CONTRIB_BRANCH ?= master
+OV_SOURCE_BRANCH ?= releases/2023/0
+OV_CONTRIB_BRANCH ?= releases/2023/0
+
+OV_SOURCE_ORG ?= openvinotoolkit
+OV_CONTRIB_ORG ?= openvinotoolkit
+
+SENTENCEPIECE ?= 1
OV_USE_BINARY ?= 1
APT_OV_PACKAGE ?= openvino-2022.1.0
@@ -58,10 +64,15 @@ BAZEL_BUILD_TYPE ?= opt
CMAKE_BUILD_TYPE ?= Release
MINITRACE ?= OFF
+DISABLE_MEDIAPIPE_PARAMS ?= ""
+ifeq ($(MEDIAPIPE_DISABLE),1)
+ DISABLE_MEDIAPIPE_PARAMS = " --define MEDIAPIPE_DISABLE=1 --cxxopt=-DMEDIAPIPE_DISABLE=1 "
+endif
+
ifeq ($(BAZEL_BUILD_TYPE),dbg)
- BAZEL_DEBUG_FLAGS=" --strip=never --copt=-g -c dbg "
+ BAZEL_DEBUG_FLAGS=" --strip=never --copt=-g -c dbg "$(DISABLE_MEDIAPIPE_PARAMS)
else
- BAZEL_DEBUG_FLAGS=" --strip=never "
+ BAZEL_DEBUG_FLAGS=" --strip=never "$(DISABLE_MEDIAPIPE_PARAMS)
endif
ifeq ($(MINITRACE),ON)
@@ -79,18 +90,31 @@ ifeq ($(BASE_OS),ubuntu)
BASE_OS_TAG=$(BASE_OS_TAG_UBUNTU)
ifeq ($(NVIDIA),1)
BASE_IMAGE=docker.io/nvidia/cuda:11.8.0-runtime-ubuntu20.04
+ BASE_IMAGE_RELEASE=$(BASE_IMAGE)
else
BASE_IMAGE ?= ubuntu:$(BASE_OS_TAG_UBUNTU)
+ BASE_IMAGE_RELEASE=$(BASE_IMAGE)
+ endif
+ ifeq ($(BASE_OS_TAG_UBUNTU),20.04)
+ INSTALL_DRIVER_VERSION ?= "22.43.24595"
+ DLDT_PACKAGE_URL ?= https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.0/linux/l_openvino_toolkit_ubuntu20_2023.0.0.10926.b4452d56304_x86_64.tgz
+ else ifeq ($(BASE_OS_TAG_UBUNTU),22.04)
+ INSTALL_DRIVER_VERSION ?= "23.13.26032"
+ DLDT_PACKAGE_URL ?= https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.0/linux/l_openvino_toolkit_ubuntu22_2023.0.0.10926.b4452d56304_x86_64.tgz
endif
- INSTALL_DRIVER_VERSION ?= "22.35.24055"
- DLDT_PACKAGE_URL ?= https://storage.openvinotoolkit.org/repositories/openvino/packages/2022.3/linux/l_openvino_toolkit_ubuntu20_2022.3.0.9052.9752fafe8eb_x86_64.tgz
endif
ifeq ($(BASE_OS),redhat)
BASE_OS_TAG=$(BASE_OS_TAG_REDHAT)
- BASE_IMAGE ?= registry.access.redhat.com/ubi8/ubi:$(BASE_OS_TAG_REDHAT)
+ ifeq ($(NVIDIA),1)
+ BASE_IMAGE=docker.io/nvidia/cuda:11.8.0-runtime-ubi8
+ BASE_IMAGE_RELEASE=$(BASE_IMAGE)
+ else
+ BASE_IMAGE ?= registry.access.redhat.com/ubi8/ubi:$(BASE_OS_TAG_REDHAT)
+ BASE_IMAGE_RELEASE=registry.access.redhat.com/ubi8/ubi-minimal:$(BASE_OS_TAG_REDHAT)
+ endif
DIST_OS=redhat
- INSTALL_DRIVER_VERSION ?= "22.28.23726"
- DLDT_PACKAGE_URL ?= https://storage.openvinotoolkit.org/repositories/openvino/packages/2022.3/linux/l_openvino_toolkit_rhel8_2022.3.0.9052.9752fafe8eb_x86_64.tgz
+ INSTALL_DRIVER_VERSION ?= "22.43.24595"
+ DLDT_PACKAGE_URL ?= https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.0/linux/l_openvino_toolkit_rhel8_2023.0.0.10926.b4452d56304_x86_64.tgz
endif
OVMS_CPP_DOCKER_IMAGE ?= openvino/model_server
@@ -104,7 +128,7 @@ ifeq ($(NVIDIA),1)
endif
PRODUCT_NAME = "OpenVINO Model Server"
-PRODUCT_VERSION ?= "2022.3.0.1"
+PRODUCT_VERSION ?= "2023.0.0"
OVMS_CPP_CONTAINTER_NAME ?= server-test$(shell date +%Y-%m-%d-%H.%M.%S)
OVMS_CPP_CONTAINTER_PORT ?= 9178
@@ -131,28 +155,22 @@ $(ACTIVATE):
cppclean: venv
@echo "Checking cppclean..."
- @. $(ACTIVATE); bash -c "./cppclean.sh"
+ @. $(ACTIVATE); bash -c "./ci/cppclean.sh"
style: venv clang-format-check cpplint cppclean
-sdl-check: venv
+hadolint:
@echo "Checking SDL requirements..."
@echo "Checking docker files..."
@./tests/hadolint.sh
+bandit:
@echo "Checking python files..."
- @. $(ACTIVATE); bash -c "bandit -x demos/benchmark/python -r demos/*/python > bandit.txt"
- @if ! grep -FRq "No issues identified." bandit.txt; then\
- error Run bandit on demos/*/python/*.py to fix issues.;\
- fi
- @rm bandit.txt
- @. $(ACTIVATE); bash -c "bandit -r client/python/ > bandit2.txt"
- @if ! grep -FRq "No issues identified." bandit2.txt; then\
- error Run bandit on client/python/ to fix issues.;\
- fi
- @rm bandit2.txt
+ @. $(ACTIVATE); bash -c "./ci/bandit.sh"
+
+license-headers:
@echo "Checking license headers in files..."
- @. $(ACTIVATE); bash -c "python3 lib_search.py . > missing_headers.txt"
+ @. $(ACTIVATE); bash -c "python3 ./ci/lib_search.py . > missing_headers.txt"
@if ! grep -FRq "All files have headers" missing_headers.txt; then\
echo "Files with missing headers";\
cat missing_headers.txt;\
@@ -160,10 +178,12 @@ sdl-check: venv
fi
@rm missing_headers.txt
+sdl-check: venv hadolint bandit license-headers
+
@echo "Checking forbidden functions in files..."
- @. $(ACTIVATE); bash -c "python3 lib_search.py . functions > forbidden_functions.txt"
+ @. $(ACTIVATE); bash -c "python3 ./ci/lib_search.py . functions > forbidden_functions.txt"
@if ! grep -FRq "All files checked for forbidden functions" forbidden_functions.txt; then\
- error Run python3 lib_search.py . functions - to see forbidden functions file list.;\
+ error Run python3 ./ci/lib_search.py . functions - to see forbidden functions file list.;\
fi
@rm forbidden_functions.txt
@@ -192,25 +212,39 @@ ifeq ($(NVIDIA),1)
ifeq ($(OV_USE_BINARY),1)
@echo "Building NVIDIA plugin requires OV built from source. To build NVIDIA plugin and OV from source make command should look like this 'NVIDIA=1 OV_USE_BINARY=0 make docker_build'"; exit 1 ;
endif
+ ifeq ($(BASE_OS),redhat)
+ @echo "copying RH entitlements"
+ @cp -ru /etc/pki/entitlement .
+ @mkdir rhsm-ca
+ @cp -u /etc/rhsm/ca/* rhsm-ca/
+ endif
endif
-ifeq ($(BUILD_CUSTOM_NODES),true)
- @echo "Building custom nodes"
- @cd src/custom_nodes && make BASE_OS=$(BASE_OS)
+ifeq ($(BASE_OS),redhat)
+ @mkdir -p entitlement
+ @mkdir -p rhsm-ca
endif
- @echo "Building docker image $(BASE_OS)"
- # Provide metadata information into image if defined
- @mkdir -p .workspace
- @bash -c '$(eval PROJECT_VER_PATCH:=`git rev-parse --short HEAD`)'
- @bash -c '$(eval PROJECT_NAME:=${PRODUCT_NAME})'
- @bash -c '$(eval PROJECT_VERSION:=${PRODUCT_VERSION}.${PROJECT_VER_PATCH})'
ifeq ($(NO_DOCKER_CACHE),true)
$(eval NO_CACHE_OPTION:=--no-cache)
@echo "Docker image will be rebuilt from scratch"
@docker pull $(BASE_IMAGE)
ifeq ($(BASE_OS),redhat)
@docker pull registry.access.redhat.com/ubi8/ubi-minimal:$(BASE_OS_TAG_REDHAT)
+ ifeq ($(NVIDIA),1)
+ @docker pull docker.io/nvidia/cuda:11.8.0-runtime-ubi8
+ endif
endif
endif
+ifeq ($(BUILD_CUSTOM_NODES),true)
+ @echo "Building custom nodes"
+ @cd src/custom_nodes && make NO_DOCKER_CACHE=$(NO_DOCKER_CACHE) BASE_OS=$(BASE_OS) BASE_IMAGE=$(BASE_IMAGE)
+ @cd src/custom_nodes/tokenizer && make NO_DOCKER_CACHE=$(NO_DOCKER_CACHE) BASE_OS=$(BASE_OS) BASE_IMAGE=$(BASE_IMAGE)
+endif
+ @echo "Building docker image $(BASE_OS)"
+ # Provide metadata information into image if defined
+ @mkdir -p .workspace
+ @bash -c '$(eval PROJECT_VER_PATCH:=`git rev-parse --short HEAD`)'
+ @bash -c '$(eval PROJECT_NAME:=${PRODUCT_NAME})'
+ @bash -c '$(eval PROJECT_VERSION:=${PRODUCT_VERSION}.${PROJECT_VER_PATCH})'
ifneq ($(OVMS_METADATA_FILE),)
@cp $(OVMS_METADATA_FILE) .workspace/metadata.json
else
@@ -219,8 +253,9 @@ endif
@cat .workspace/metadata.json
docker build $(NO_CACHE_OPTION) -f Dockerfile.$(BASE_OS) . \
--build-arg http_proxy=$(HTTP_PROXY) --build-arg https_proxy=$(HTTPS_PROXY) --build-arg no_proxy=$(NO_PROXY) \
- --build-arg ovms_metadata_file=.workspace/metadata.json --build-arg ov_source_branch="$(OV_SOURCE_BRANCH)" \
- --build-arg ov_use_binary=$(OV_USE_BINARY) --build-arg DLDT_PACKAGE_URL=$(DLDT_PACKAGE_URL) \
+ --build-arg ovms_metadata_file=.workspace/metadata.json --build-arg ov_source_branch="$(OV_SOURCE_BRANCH)" --build-arg ov_source_org="$(OV_SOURCE_ORG)" \
+ --build-arg ov_contrib_org="$(OV_CONTRIB_ORG)" \
+ --build-arg ov_use_binary=$(OV_USE_BINARY) --build-arg sentencepiece=$(SENTENCEPIECE) --build-arg DLDT_PACKAGE_URL=$(DLDT_PACKAGE_URL) \
--build-arg APT_OV_PACKAGE=$(APT_OV_PACKAGE) --build-arg CHECK_COVERAGE=$(CHECK_COVERAGE) --build-arg RUN_TESTS=$(RUN_TESTS)\
--build-arg build_type=$(BAZEL_BUILD_TYPE) --build-arg debug_bazel_flags=$(BAZEL_DEBUG_FLAGS) \
--build-arg CMAKE_BUILD_TYPE=$(CMAKE_BUILD_TYPE) \
@@ -235,7 +270,7 @@ endif
targz_package: ovms_builder_image
docker build $(NO_CACHE_OPTION) -f DockerfileMakePackage . \
--build-arg http_proxy=$(HTTP_PROXY) --build-arg https_proxy="$(HTTPS_PROXY)" \
- --build-arg ov_use_binary=$(OV_USE_BINARY) --build-arg BASE_OS=$(BASE_OS) \
+ --build-arg ov_use_binary=$(OV_USE_BINARY) --build-arg sentencepiece=$(SENTENCEPIECE) --build-arg BASE_OS=$(BASE_OS) \
--build-arg NVIDIA=$(NVIDIA) \
-t $(OVMS_CPP_DOCKER_IMAGE)-pkg:$(OVMS_CPP_IMAGE_TAG) \
--build-arg BUILD_IMAGE=$(OVMS_CPP_DOCKER_IMAGE)-build:$(OVMS_CPP_IMAGE_TAG)$(IMAGE_TAG_SUFFIX)
@@ -253,7 +288,7 @@ ovms_release_image: targz_package
--build-arg no_proxy=$(NO_PROXY) \
--build-arg INSTALL_RPMS_FROM_URL="$(INSTALL_RPMS_FROM_URL)" \
--build-arg GPU=0 \
- --build-arg BASE_IMAGE=$(BASE_IMAGE) \
+ --build-arg BASE_IMAGE=$(BASE_IMAGE_RELEASE) \
--build-arg NVIDIA=$(NVIDIA) \
-t $(OVMS_CPP_DOCKER_IMAGE):$(OVMS_CPP_IMAGE_TAG)$(IMAGE_TAG_SUFFIX)
cd dist/$(DIST_OS)/ && docker build $(NO_CACHE_OPTION) -f Dockerfile.$(BASE_OS) . \
@@ -262,7 +297,7 @@ ovms_release_image: targz_package
--build-arg INSTALL_RPMS_FROM_URL="$(INSTALL_RPMS_FROM_URL)" \
--build-arg INSTALL_DRIVER_VERSION="$(INSTALL_DRIVER_VERSION)" \
--build-arg GPU=1 \
- --build-arg BASE_IMAGE=$(BASE_IMAGE) \
+ --build-arg BASE_IMAGE=$(BASE_IMAGE_RELEASE) \
--build-arg NVIDIA=$(NVIDIA) \
-t $(OVMS_CPP_DOCKER_IMAGE)-gpu:$(OVMS_CPP_IMAGE_TAG)$(IMAGE_TAG_SUFFIX) && \
docker tag $(OVMS_CPP_DOCKER_IMAGE)-gpu:$(OVMS_CPP_IMAGE_TAG)$(IMAGE_TAG_SUFFIX) $(OVMS_CPP_DOCKER_IMAGE):$(OVMS_CPP_IMAGE_TAG)-gpu$(IMAGE_TAG_SUFFIX)
@@ -434,6 +469,7 @@ cpu_extension:
--build-arg https_proxy=${https_proxy} \
--build-arg no_proxy=${no_proxy} \
--build-arg DLDT_PACKAGE_URL=${DLDT_PACKAGE_URL} \
- --build-arg APT_OV_PACKAGE=${APT_OV_PACKAGE} .
+ --build-arg APT_OV_PACKAGE=${APT_OV_PACKAGE} \
+ --build-arg BASE_IMAGE=${BASE_IMAGE} .
mkdir -p ./lib/${BASE_OS}
docker cp $$(docker create --rm sample_cpu_extension:latest):/workspace/libcustom_relu_cpu_extension.so ./lib/${BASE_OS}
diff --git a/MakefileCapi b/MakefileCapi
index a21aefaa28..4265f665f7 100644
--- a/MakefileCapi
+++ b/MakefileCapi
@@ -13,13 +13,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+
cpp:
- bazel build //src:ovms_shared
+ bazel build ${BAZEL_DEBUG_FLAGS} //src:ovms_shared
g++ src/main_capi.cpp -I/ovms/src/ -L/ovms/bazel-bin/src/ -lovms_shared -fPIC --std=c++17 -o /ovms/bazel-bin/src/capi_cpp_example
LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/ovms/bazel-bin/src/ /ovms/bazel-bin/src/capi_cpp_example
c:
- bazel build //src:ovms_shared
+ bazel build ${BAZEL_DEBUG_FLAGS} //src:ovms_shared
gcc -c src/main_capi.c -o /ovms/bazel-bin/src/main_capi.o -std=c99
gcc -o /ovms/bazel-bin/src/capi_c_example /ovms/bazel-bin/src/main_capi.o -lovms_shared -L/ovms/bazel-bin/src/
LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/ovms/bazel-bin/src/ /ovms/bazel-bin/src/capi_c_example
diff --git a/README.md b/README.md
index eaa6a98ce2..4e09b059ce 100644
--- a/README.md
+++ b/README.md
@@ -1,33 +1,40 @@
# OpenVINO™ Model Server
-![OVMS picture](docs/ovms.png)
+Model Server hosts models and makes them accessible to software components over standard network protocols: a client sends a request to the model server, which performs model inference and sends a response back to the client. Model Server offers many advantages for efficient model deployment:
+- Remote inference enables using lightweight clients with only the necessary functions to perform API calls to edge or cloud deployments.
+- Applications are independent of the model framework, hardware device, and infrastructure.
+- Client applications in any programming language that supports REST or gRPC calls can be used to run inference remotely on the model server.
+- Clients require fewer updates since client libraries change very rarely.
+- Model topology and weights are not exposed directly to client applications, making it easier to control access to the model.
+- Ideal architecture for microservices-based applications and deployments in cloud environments – including Kubernetes and OpenShift clusters.
+- Efficient resource utilization with horizontal and vertical inference scaling.
-OpenVINO™ Model Server (OVMS) is a high-performance system for serving machine learning models. It is based on C++ for high scalability
-and optimized for Intel solutions, so that you can take advantage of all the power of the Intel® Xeon® processor or Intel’s AI accelerators
-and expose it over a network interface. OVMS uses the same architecture and API as [TensorFlow Serving](https://github.com/tensorflow/serving),
-while applying OpenVINO for inference execution. Inference service is provided via gRPC or REST API, making it easy to deploy new algorithms and AI experiments.
+![OVMS diagram](docs/ovms_diagram.png)
-Model repositories may reside on a locally accessible file system (e.g. NFS), as well as online storage compatible with
-Google Cloud Storage (GCS), Amazon S3, or Azure Blob Storage.
+OpenVINO™ Model Server (OVMS) is a high-performance system for serving models. Implemented in C++ for scalability and optimized for deployment on Intel architectures, the model server uses the same architecture and API as [TensorFlow Serving](https://github.com/tensorflow/serving) and [KServe](https://github.com/kserve/kserve) while applying OpenVINO for inference execution. Inference service is provided via gRPC or REST API, making deploying new algorithms and AI experiments easy.
+
+![OVMS picture](docs/ovms_high_level.png)
+
+The models used by the server need to be stored locally or hosted remotely by object storage services. For more details, refer to [Preparing Model Repository](docs/models_repository.md) documentation. Model server works inside [Docker containers](docs/deploying_server.md), on [Bare Metal](docs/deploying_server.md), and in [Kubernetes environment](docs/deploying_server.md).
+Start using OpenVINO Model Server with a fast-forward serving example from the [Quickstart guide](docs/ovms_quickstart.md) or explore [Model Server features](docs/features.md).
Read [release notes](https://github.com/openvinotoolkit/model_server/releases) to find out what’s new.
Key features:
- support for multiple frameworks, such as Caffe, TensorFlow, MXNet, PaddlePaddle and ONNX
-- online deployment of new [model versions](https://docs.openvino.ai/2022.2/ovms_docs_model_version_policy.html)
-- [configuration updates in runtime](https://docs.openvino.ai/2022.2/ovms_docs_online_config_changes.html)
+- online deployment of new [model versions](https://docs.openvino.ai/2023.0/ovms_docs_model_version_policy.html)
+- [configuration updates in runtime](https://docs.openvino.ai/2023.0/ovms_docs_online_config_changes.html)
- support for AI accelerators, such as
-[Intel Movidius Myriad VPUs](https://docs.openvino.ai/2022.2/openvino_docs_OV_UG_supported_plugins_MYRIAD.html),
-[GPU](https://docs.openvino.ai/2022.2/openvino_docs_OV_UG_supported_plugins_GPU.html), and
-[HDDL](https://docs.openvino.ai/2022.2/openvino_docs_OV_UG_supported_plugins_HDDL.html)
-- works with Bare Metal Hosts as well as [Docker containers](https://docs.openvino.ai/2022.3/ovms_docs_deploying_server.html)
-- [model reshaping](https://docs.openvino.ai/2022.2/ovms_docs_shape_batch_layout.html) in runtime
-- [directed Acyclic Graph Scheduler](https://docs.openvino.ai/2022.2/ovms_docs_dag.html) - connecting multiple models to deploy complex processing solutions and reducing data transfer overhead
-- [custom nodes in DAG pipelines](https://docs.openvino.ai/2022.2/ovms_docs_custom_node_development.html) - allowing model inference and data transformations to be implemented with a custom node C/C++ dynamic library
-- [serving stateful models](https://docs.openvino.ai/2022.2/ovms_docs_stateful_models.html) - models that operate on sequences of data and maintain their state between inference requests
-- [binary format of the input data](https://docs.openvino.ai/2022.2/ovms_docs_binary_input.html) - data can be sent in JPEG or PNG formats to reduce traffic and offload the client applications
-- [model caching](https://docs.openvino.ai/2022.2/ovms_docs_model_cache.html) - cache the models on first load and re-use models from cache on subsequent loads
-- [metrics](https://docs.openvino.ai/2022.2/ovms_docs_metrics.html) - metrics compatible with Prometheus standard
+[Intel Movidius Myriad VPUs](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_supported_plugins_MYRIAD.html),
+[GPU](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_supported_plugins_GPU.html), and
+- works with Bare Metal Hosts as well as [Docker containers](https://docs.openvino.ai/2023.0/ovms_docs_deploying_server.html)
+- [model reshaping](https://docs.openvino.ai/2023.0/ovms_docs_shape_batch_layout.html) in runtime
+- [directed Acyclic Graph Scheduler](https://docs.openvino.ai/2023.0/ovms_docs_dag.html) - connecting multiple models to deploy complex processing solutions and reducing data transfer overhead
+- [custom nodes in DAG pipelines](https://docs.openvino.ai/2023.0/ovms_docs_custom_node_development.html) - allowing model inference and data transformations to be implemented with a custom node C/C++ dynamic library
+- [serving stateful models](https://docs.openvino.ai/2023.0/ovms_docs_stateful_models.html) - models that operate on sequences of data and maintain their state between inference requests
+- [binary format of the input data](https://docs.openvino.ai/2023.0/ovms_docs_binary_input.html) - data can be sent in JPEG or PNG formats to reduce traffic and offload the client applications
+- [model caching](https://docs.openvino.ai/2023.0/ovms_docs_model_cache.html) - cache the models on first load and re-use models from cache on subsequent loads
+- [metrics](https://docs.openvino.ai/2023.0/ovms_docs_metrics.html) - metrics compatible with Prometheus standard
**Note:** OVMS has been tested on RedHat, and Ubuntu. The latest publicly released docker images are based on Ubuntu and UBI.
@@ -38,26 +45,26 @@ They are stored in:
## Run OpenVINO Model Server
-A demonstration on how to use OpenVINO Model Server can be found in [our quick-start guide](https://docs.openvino.ai/2022.2/ovms_docs_quick_start_guide.html).
+A demonstration on how to use OpenVINO Model Server can be found in [our quick-start guide](https://docs.openvino.ai/2023.0/ovms_docs_quick_start_guide.html).
For more information on using Model Server in various scenarios you can check the following guides:
-* [Model repository configuration](https://docs.openvino.ai/2022.2/ovms_docs_models_repository.html)
+* [Model repository configuration](https://docs.openvino.ai/2023.0/ovms_docs_models_repository.html)
-* [Deployment options](https://docs.openvino.ai/2022.3/ovms_docs_deploying_server.html)
+* [Deployment options](https://docs.openvino.ai/2023.0/ovms_docs_deploying_server.html)
-* [Performance tuning](https://docs.openvino.ai/2022.2/ovms_docs_performance_tuning.html)
+* [Performance tuning](https://docs.openvino.ai/2023.0/ovms_docs_performance_tuning.html)
-* [Directed Acyclic Graph Scheduler](https://docs.openvino.ai/2022.2/ovms_docs_dag.html)
+* [Directed Acyclic Graph Scheduler](https://docs.openvino.ai/2023.0/ovms_docs_dag.html)
-* [Custom nodes development](https://docs.openvino.ai/2022.2/ovms_docs_custom_node_development.html)
+* [Custom nodes development](https://docs.openvino.ai/2023.0/ovms_docs_custom_node_development.html)
-* [Serving stateful models](https://docs.openvino.ai/2022.2/ovms_docs_stateful_models.html)
+* [Serving stateful models](https://docs.openvino.ai/2023.0/ovms_docs_stateful_models.html)
* [Deploy using a Kubernetes Helm Chart](https://github.com/openvinotoolkit/operator/tree/main/helm-charts/ovms)
* [Deployment using Kubernetes Operator](https://operatorhub.io/operator/ovms-operator)
-* [Using binary input data](https://docs.openvino.ai/2022.2/ovms_docs_binary_input.html)
+* [Using binary input data](https://docs.openvino.ai/2023.0/ovms_docs_binary_input.html)
@@ -71,7 +78,7 @@ For more information on using Model Server in various scenarios you can check th
* [RESTful API](https://restfulapi.net/)
-* [Benchmarking results](https://docs.openvino.ai/2022.1/openvino_docs_performance_benchmarks_ovms.html)
+* [Benchmarking results](https://docs.openvino.ai/2023.0/openvino_docs_performance_benchmarks_ovms.html)
* [Speed and Scale AI Inference Operations Across Multiple Architectures](https://techdecoded.intel.io/essentials/speed-and-scale-ai-inference-operations-across-multiple-architectures/?elq_cid=3646480_ts1607680426276&erpm_id=6470692_ts1607680426276) - webinar recording
diff --git a/WORKSPACE b/WORKSPACE
index ce28c3b851..dcef650751 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -70,6 +70,109 @@ git_repository(
# allow all http methods
)
+########################################################### Mediapipe
+http_archive(
+ name = "com_google_protobuf",
+ sha256 = "87407cd28e7a9c95d9f61a098a53cf031109d451a7763e7dd1253abf8b4df422",
+ strip_prefix = "protobuf-3.19.1",
+ urls = ["https://github.com/protocolbuffers/protobuf/archive/v3.19.1.tar.gz"],
+ #patches = [
+ # "@//third_party:com_google_protobuf_fixes.diff"
+ #],
+ #patch_args = [
+ # "-p1",
+ #],
+)
+
+################################### Official mediapipe repository #########
+#### Will be used on feature release
+git_repository(
+ name = "mediapipe",
+ remote = "https://github.com/google/mediapipe",
+ tag = "v0.9.1",
+)
+
+# DEV mediapipe 1 source - adjust local repository path for build
+#local_repository(
+# name = "mediapipe",
+# path = "/mediapipe/",
+#)
+
+# Protobuf for Node dependencies
+http_archive(
+ name = "rules_proto_grpc",
+ sha256 = "bbe4db93499f5c9414926e46f9e35016999a4e9f6e3522482d3760dc61011070",
+ strip_prefix = "rules_proto_grpc-4.2.0",
+ urls = ["https://github.com/rules-proto-grpc/rules_proto_grpc/archive/4.2.0.tar.gz"],
+)
+
+# Node dependencies
+http_archive(
+ name = "build_bazel_rules_nodejs",
+ sha256 = "5aae76dced38f784b58d9776e4ab12278bc156a9ed2b1d9fcd3e39921dc88fda",
+ urls = ["https://github.com/bazelbuild/rules_nodejs/releases/download/5.7.1/rules_nodejs-5.7.1.tar.gz"],
+)
+
+load("@build_bazel_rules_nodejs//:repositories.bzl", "build_bazel_rules_nodejs_dependencies")
+build_bazel_rules_nodejs_dependencies()
+
+# fetches nodejs, npm, and yarn
+load("@build_bazel_rules_nodejs//:index.bzl", "node_repositories", "yarn_install")
+node_repositories()
+yarn_install(
+ name = "npm",
+ package_json = "//:package.json",
+ yarn_lock = "//:yarn.lock",
+)
+
+http_archive(
+ name = "com_google_protobuf_javascript",
+ sha256 = "35bca1729532b0a77280bf28ab5937438e3dcccd6b31a282d9ae84c896b6f6e3",
+ strip_prefix = "protobuf-javascript-3.21.2",
+ urls = ["https://github.com/protocolbuffers/protobuf-javascript/archive/refs/tags/v3.21.2.tar.gz"],
+)
+
+http_archive(
+ name = "rules_foreign_cc",
+ strip_prefix = "rules_foreign_cc-0.1.0",
+ url = "https://github.com/bazelbuild/rules_foreign_cc/archive/0.1.0.zip",
+)
+
+load("@rules_foreign_cc//:workspace_definitions.bzl", "rules_foreign_cc_dependencies")
+
+rules_foreign_cc_dependencies()
+
+# gflags needed by glog
+http_archive(
+ name = "com_github_gflags_gflags",
+ strip_prefix = "gflags-2.2.2",
+ sha256 = "19713a36c9f32b33df59d1c79b4958434cb005b5b47dc5400a7a4b078111d9b5",
+ url = "https://github.com/gflags/gflags/archive/v2.2.2.zip",
+)
+
+git_repository(
+ name = "com_github_glog_glog",
+ remote = "https://github.com/google/glog",
+ tag = "v0.5.0",
+)
+
+load("@mediapipe//third_party:external_files.bzl", "external_files")
+external_files()
+
+new_local_repository(
+ name = "linux_openvino",
+ build_file = "@//third_party/openvino:BUILD",
+ path = "/opt/intel/openvino/runtime",
+)
+
+new_local_repository(
+ name = "linux_opencv",
+ build_file = "@//third_party/opencv:BUILD",
+ path = "/opt/opencv/",
+)
+
+########################################################### Mediapipe end
+
# minitrace
new_git_repository(
name = "minitrace",
@@ -179,7 +282,7 @@ grpc_extra_deps()
# cxxopts
http_archive(
- name = "cxxopts",
+ name = "com_github_jarro2783_cxxopts",
url = "https://github.com/jarro2783/cxxopts/archive/v2.2.0.zip",
sha256 = "f9640c00d9938bedb291a21f9287902a3a8cee38db6910b905f8eba4a6416204",
strip_prefix = "cxxopts-2.2.0",
@@ -188,7 +291,7 @@ http_archive(
# RapidJSON
http_archive(
- name = "rapidjson",
+ name = "com_github_tencent_rapidjson",
url = "https://github.com/Tencent/rapidjson/archive/v1.1.0.zip",
sha256 = "8e00c38829d6785a2dfb951bb87c6974fa07dfe488aa5b25deec4b8bc0f6a3ab",
strip_prefix = "rapidjson-1.1.0",
@@ -197,7 +300,7 @@ http_archive(
# spdlog
http_archive(
- name = "spdlog",
+ name = "com_github_gabime_spdlog",
url = "https://github.com/gabime/spdlog/archive/v1.4.0.tar.gz",
sha256 = "afd18f62d1bc466c60bef088e6b637b0284be88c515cedc59ad4554150af6043",
strip_prefix = "spdlog-1.4.0",
@@ -248,3 +351,32 @@ new_local_repository(
path = "/opt/opencv",
)
################## END OF OPENCV DEPENDENCY ##########
+
+new_git_repository(
+ name = "model_api",
+ remote = "https:///github.com/openvinotoolkit/model_api/",
+ build_file_content = """
+cc_library(
+ name = "adapter_api",
+ hdrs = ["model_api/cpp/adapters/include/adapters/inference_adapter.h",],
+ includes = ["model_api/cpp/adapters/include"],
+ deps = ["@linux_openvino//:openvino"],
+ visibility = ["//visibility:public"],
+)
+ """,
+ commit = "7e163416c60ba9ccdf440c6c049d6c7e7137e144"
+)
+
+git_repository(
+ name = "oneTBB",
+ branch = "v2021.8.0",
+ remote = "https://github.com/oneapi-src/oneTBB/",
+ patch_args = ["-p1"],
+ patches = ["mwaitpkg.patch",]
+)
+
+new_local_repository(
+ name = "mediapipe_calculators",
+ build_file = "@//third_party/mediapipe_calculators:BUILD",
+ path = "/ovms/third_party/mediapipe_calculators",
+)
diff --git a/ci/Dockerfile.coverity b/ci/Dockerfile.coverity
index a87e0a455e..10fddf10ec 100644
--- a/ci/Dockerfile.coverity
+++ b/ci/Dockerfile.coverity
@@ -30,7 +30,10 @@ RUN cd $(bazel info output_base)/external/build_bazel_rules_apple/ && patch -p1
WORKDIR /ovms/
RUN /cov/bin/cov-configure --gcc --config coverity_config.xml && \
/cov/bin/cov-configure --comptype gcc --compiler /usr/bin/gcc && \
- /cov/bin/cov-build --dir cov-int bash -c 'bazel shutdown; bazel clean; bazel build --spawn_strategy=standalone //src:static_analysis && cmake /client/cpp/kserve-api && make --jobs=$(nproc); \
+ /cov/bin/cov-build --dir cov-int bash -c 'bazel shutdown; bazel clean; bazel build --spawn_strategy=standalone //src:static_analysis && cmake /client/cpp/kserve-api && make --jobs=$(nproc) && \
+ cd /ovms/src/custom_nodes/tokenizer && \
+ mkdir build && cd build && \
+ cmake .. && make --jobs=$(nproc) && \
cd /example_cpp_client/cpp && \
bazel build --spawn_strategy=standalone //src:all'
diff --git a/ci/bandit.sh b/ci/bandit.sh
new file mode 100755
index 0000000000..14a5196f64
--- /dev/null
+++ b/ci/bandit.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+BANDIT_RESULTS="bandit.txt"
+function error_handle {
+ cat ${BANDIT_RESULTS}
+}
+function cleanup {
+ echo "Cleaning up bandit scan"
+ rm ${BANDIT_RESULTS}
+}
+trap error_handle ERR
+trap cleanup EXIT
+bandit -x demos/benchmark/python -r demos/*/python > ${BANDIT_RESULTS}
+if ! grep -FRq "No issues identified." ${BANDIT_RESULTS}; then
+ echo "Bandit scan failed for demos";
+ exit 1;
+fi
+rm ${BANDIT_RESULTS}
+bandit -r client/python/ > ${BANDIT_RESULTS}
+if ! grep -FRq "No issues identified." ${BANDIT_RESULTS}; then
+ echo "Bandit scan failed for client";
+ exit 2;
+fi
+exit 0
diff --git a/check_coverage.bat b/ci/check_coverage.bat
similarity index 87%
rename from check_coverage.bat
rename to ci/check_coverage.bat
index 2c41c30be8..c3fdea87e7 100755
--- a/check_coverage.bat
+++ b/ci/check_coverage.bat
@@ -1,12 +1,12 @@
#!/bin/bash
#Ubuntu
-#MIN_LINES_COV=76.3
-#MIN_FUNCTION_COV=88.3
+MIN_LINES_COV=76.8
+MIN_FUNCTION_COV=87.6
#Rhel
-MIN_LINES_COV=74.8
-MIN_FUNCTION_COV=75.8
+MIN_LINES_COV=75.6
+MIN_FUNCTION_COV=75.4
LINES_COV=`cat genhtml/index.html | grep "headerCovTableEntry.*%" | grep -oP ">\K(\d*.\d*) " | head -n 1`
FUNC_COV=`cat genhtml/index.html | grep "headerCovTableEntry.*%" | grep -oP ">\K(\d*.\d*) " | tail -n 1`
diff --git a/cppclean.sh b/ci/cppclean.sh
similarity index 58%
rename from cppclean.sh
rename to ci/cppclean.sh
index e18ad3c07d..47b7834565 100755
--- a/cppclean.sh
+++ b/ci/cppclean.sh
@@ -16,37 +16,55 @@
#
CPPCLEAN_RESULTS_FILE_SRC="cppclean_src"
CPPCLEAN_RESULTS_FILE_TEST="cppclean_test"
-cppclean ./src/ 2>&1 | grep -v test > ${CPPCLEAN_RESULTS_FILE_SRC};
-cppclean ./src/ 2>&1 | grep test > ${CPPCLEAN_RESULTS_FILE_TEST};
+cppclean ./src/ 2>&1 | grep -v "unable to find" | grep -v test > ${CPPCLEAN_RESULTS_FILE_SRC};
+cppclean ./src/ 2>&1 | grep -v "unable to find" | grep test > ${CPPCLEAN_RESULTS_FILE_TEST};
NO_WARNINGS=$(wc -l ${CPPCLEAN_RESULTS_FILE_SRC} | awk '{print $1}')
-NO_WARNINGS_TEST=$(wc -l ${CPPCLEAN_RESULTS_FILE_TEST} | awk '{print $1}')
NO_WARNINGS_FORWARD=$(grep "use a forward declaration instead" ${CPPCLEAN_RESULTS_FILE_SRC} | wc -l)
NO_WARNINGS_DIRECT=$(grep "not found in any directly #included header" ${CPPCLEAN_RESULTS_FILE_SRC} | wc -l)
NO_WARNINGS_NOTUSED=$(grep " not used$" ${CPPCLEAN_RESULTS_FILE_SRC} | wc -l)
+NO_WARNINGS_TEST=$(wc -l ${CPPCLEAN_RESULTS_FILE_TEST} | awk '{print $1}')
+NO_WARNINGS_TEST_FORWARD=$(grep "use a forward declaration instead" ${CPPCLEAN_RESULTS_FILE_TEST} | wc -l)
+NO_WARNINGS_TEST_DIRECT=$(grep "not found in any directly #included header" ${CPPCLEAN_RESULTS_FILE_TEST} | wc -l)
+NO_WARNINGS_TEST_NOTUSED=$(grep " not used$" ${CPPCLEAN_RESULTS_FILE_TEST} | wc -l)
echo "Number of warnings:" ${NO_WARNINGS}
echo "Number of warnings in tests:" ${NO_WARNINGS_TEST}
echo "Number of warnings about not using forward delares:" ${NO_WARNINGS_FORWARD}
echo "Number of warnings about not direct includes:" ${NO_WARNINGS_DIRECT}
echo "Number of warnings about not used: " ${NO_WARNINGS_NOTUSED}
+echo "Number of warnings in tests about not using forward delares:" ${NO_WARNINGS_TEST_FORWARD}
+echo "Number of warnings in tests about not direct includes:" ${NO_WARNINGS_TEST_DIRECT}
+echo "Number of warnings in tests about not used: " ${NO_WARNINGS_TEST_NOTUSED}
trap "cat ${CPPCLEAN_RESULTS_FILE_SRC}" err exit
-if [ ${NO_WARNINGS_FORWARD} -gt 6 ]; then
+if [ ${NO_WARNINGS_FORWARD} -gt 7 ]; then
echo "Failed due to not using forward declarations where possible: ${NO_WARNINGS_FORWARD}";
exit 1;
fi
-if [ ${NO_WARNINGS_DIRECT} -gt 14 ]; then
+if [ ${NO_WARNINGS_DIRECT} -gt 13 ]; then
echo "Failed probably due to not using static keyword with functions definitions: ${NO_WARNINGS_DIRECT}";
exit 1;
fi
-if [ ${NO_WARNINGS_NOTUSED} -gt 3 ]; then
+if [ ${NO_WARNINGS_NOTUSED} -gt 4 ]; then
echo "Failed probably due to unnecessary forward includes: ${NO_WARNINGS_NOTUSED}";
exit 1;
fi
-if [ ${NO_WARNINGS} -gt 196 ]; then
+if [ ${NO_WARNINGS_TEST_FORWARD} -gt 1 ]; then
+ echo "Failed due to not using forward declarations where possible: ${NO_WARNINGS_TEST_FORWARD}";
+ exit 1;
+fi
+if [ ${NO_WARNINGS_TEST_DIRECT} -gt 12 ]; then
+ echo "Failed probably due to not using static keyword with functions definitions: ${NO_WARNINGS_TEST_DIRECT}";
+ exit 1;
+fi
+if [ ${NO_WARNINGS_TEST_NOTUSED} -gt 0 ]; then
+ echo "Failed probably due to unnecessary forward includes: ${NO_WARNINGS_TEST_NOTUSED}";
+ exit 1;
+fi
+if [ ${NO_WARNINGS} -gt 159 ]; then
echo "Failed due to higher than allowed number of issues in code: ${NO_WARNINGS}"
exit 1
fi
-if [ ${NO_WARNINGS_TEST} -gt 128 ]; then
+if [ ${NO_WARNINGS_TEST} -gt 55 ]; then
echo "Failed due to higher than allowed number of issues in test code: ${NO_WARNINGS_TEST}"
cat ${CPPCLEAN_RESULTS_FILE_TEST}
exit 1
diff --git a/lib_search.py b/ci/lib_search.py
similarity index 96%
rename from lib_search.py
rename to ci/lib_search.py
index c6bc38ba8e..ab6bf6b24a 100644
--- a/lib_search.py
+++ b/ci/lib_search.py
@@ -90,6 +90,7 @@ def check_dir(start_dir):
'cppclean_test',
'docx',
'dummy.xml',
+ 'passthrough.xml',
'forbidden_functions.txt',
'gif',
'increment_1x3x4x5.xml',
@@ -114,6 +115,11 @@ def check_dir(start_dir):
'bazel_rules_apple.patch',
"pom.xml",
"go.sum",
+ "mwaitpkg.patch",
+ 'saved_model.pb',
+ "yarn.lock",
+ "BUILD.bazel",
+ "package.json",
]
exclude_directories = ['/dist/', 'extras/ovms-operator', 'extras/openvino-operator-openshift', 'release_files/thirdparty-licenses']
@@ -169,6 +175,7 @@ def check_func(start_dir):
'client_requirements.txt',
'docx',
'dummy.xml',
+ 'passthrough.xml',
'forbidden_functions.txt',
'input_images.txt',
'libevent/BUILD',
@@ -189,6 +196,9 @@ def check_func(start_dir):
'tftext.patch',
'zlib.LICENSE.txt',
'bazel_rules_apple.patch',
+ 'yarn.lock',
+ 'BUILD.bazel',
+ 'package.json',
]
exclude_directories = ['/dist/', 'extras/ovms-operator']
diff --git a/client/cpp/kserve-api/samples/grpc_async_infer_resnet.cpp b/client/cpp/kserve-api/samples/grpc_async_infer_resnet.cpp
index 1a18c121a8..244591623d 100644
--- a/client/cpp/kserve-api/samples/grpc_async_infer_resnet.cpp
+++ b/client/cpp/kserve-api/samples/grpc_async_infer_resnet.cpp
@@ -62,7 +62,7 @@ namespace tc = triton::client;
} \
}
-std::vector load(const std::string& fileName) {
+std::string load(const std::string& fileName) {
std::ifstream file(fileName, std::ios::binary);
file.unsetf(std::ios::skipws);
std::streampos fileSize;
@@ -70,16 +70,13 @@ std::vector load(const std::string& fileName) {
file.seekg(0, std::ios::end);
fileSize = file.tellg();
file.seekg(0, std::ios::beg);
+ std::ostringstream oss;
+ oss << file.rdbuf();
- std::vector vec;
- vec.reserve(fileSize);
-
- vec.insert(vec.begin(),
- std::istream_iterator(file),
- std::istream_iterator());
- return vec;
+ return oss.str();
}
+
int main(int argc, char** argv) {
cxxopts::Options opt("grpc_async_infer_resnet", "Sends requests via KServe gRPC API.");
@@ -151,14 +148,19 @@ int main(int argc, char** argv) {
std::vector shape{1};
- // Initialize the inputs with the data.
- tc::InferInput* input;
+ std::vector inputs;
+ std::vector> input_ptrs;
+ for (int i = 0; i < imgs.size(); i++) {
+ tc::InferInput* input;
+ inputs.push_back(input);
- FAIL_IF_ERR(
- tc::InferInput::Create(&input, input_name, shape, "BYTES"),
- "unable to get input");
- std::shared_ptr input_ptr;
- input_ptr.reset(input);
+ FAIL_IF_ERR(
+ tc::InferInput::Create(&input, input_name, shape, "BYTES"),
+ "unable to get input");
+ std::shared_ptr input_ptr;
+ input_ptr.reset(input);
+ input_ptrs.push_back(input_ptr);
+ }
tc::InferOptions options(model_name);
if (args.count("model_version"))
@@ -169,7 +171,6 @@ int main(int argc, char** argv) {
std::cerr << "The provided argument is of a wrong type" << std::endl;
return 1;
}
- std::vector inputs = {input_ptr.get()};
std::vector classes;
std::ifstream lb_f(args["labels_list"].as());
@@ -178,7 +179,7 @@ int main(int argc, char** argv) {
classes.push_back(tmp);
}
- std::vector> input_data;
+ std::vector input_data;
input_data.reserve(imgs.size());
for (int i = 0; i < imgs.size(); i++) {
try {
@@ -196,8 +197,9 @@ int main(int argc, char** argv) {
int completedRequestCount = 0;
auto start = std::chrono::high_resolution_clock::now();
for (int i = 0; i < imgs.size(); i++) {
+ std::vector inputs = {input_ptrs[i].get()};
FAIL_IF_ERR(
- input_ptr->AppendRaw(input_data[i]),
+ input_ptrs[i]->AppendFromString({input_data[i]}),
"unable to set data for input");
client->AsyncInfer(
[&, i](tc::InferResult* result) -> int {
@@ -231,7 +233,6 @@ int main(int argc, char** argv) {
return 0;
},
options, inputs);
- input->Reset();
}
{
std::unique_lock lk(mtx);
diff --git a/client/cpp/kserve-api/samples/grpc_infer_resnet.cpp b/client/cpp/kserve-api/samples/grpc_infer_resnet.cpp
index 0c5d7ad626..3d27afe17b 100644
--- a/client/cpp/kserve-api/samples/grpc_infer_resnet.cpp
+++ b/client/cpp/kserve-api/samples/grpc_infer_resnet.cpp
@@ -59,7 +59,7 @@ namespace tc = triton::client;
} \
}
-std::vector load(const std::string& fileName) {
+std::string load(const std::string& fileName) {
std::ifstream file(fileName, std::ios::binary);
file.unsetf(std::ios::skipws);
std::streampos fileSize;
@@ -67,14 +67,10 @@ std::vector load(const std::string& fileName) {
file.seekg(0, std::ios::end);
fileSize = file.tellg();
file.seekg(0, std::ios::beg);
+ std::ostringstream oss;
+ oss << file.rdbuf();
- std::vector vec;
- vec.reserve(fileSize);
-
- vec.insert(vec.begin(),
- std::istream_iterator(file),
- std::istream_iterator());
- return vec;
+ return oss.str();
}
int main(int argc, char** argv) {
@@ -171,7 +167,7 @@ int main(int argc, char** argv) {
std::vector results;
results.resize(imgs.size());
for (int i = 0; i < imgs.size(); i++) {
- std::vector input_data;
+ std::string input_data;
try {
input_data = load(imgs[i]);
}
@@ -180,7 +176,7 @@ int main(int argc, char** argv) {
return 1;
}
FAIL_IF_ERR(
- input_ptr->AppendRaw(input_data),
+ input_ptr->AppendFromString({input_data}),
"unable to set data for input");
FAIL_IF_ERR(
client->Infer(&(results[i]), options, inputs),
diff --git a/client/cpp/kserve-api/samples/http_async_infer_resnet.cpp b/client/cpp/kserve-api/samples/http_async_infer_resnet.cpp
index d9d0f61f0b..cafecf85ba 100644
--- a/client/cpp/kserve-api/samples/http_async_infer_resnet.cpp
+++ b/client/cpp/kserve-api/samples/http_async_infer_resnet.cpp
@@ -62,7 +62,7 @@ namespace tc = triton::client;
} \
}
-std::vector load(const std::string& fileName) {
+std::string load(const std::string& fileName) {
std::ifstream file(fileName, std::ios::binary);
file.unsetf(std::ios::skipws);
std::streampos fileSize;
@@ -70,16 +70,13 @@ std::vector load(const std::string& fileName) {
file.seekg(0, std::ios::end);
fileSize = file.tellg();
file.seekg(0, std::ios::beg);
+ std::ostringstream oss;
+ oss << file.rdbuf();
- std::vector vec;
- vec.reserve(fileSize);
-
- vec.insert(vec.begin(),
- std::istream_iterator(file),
- std::istream_iterator());
- return vec;
+ return oss.str();
}
+
int main(int argc, char** argv) {
cxxopts::Options opt("http_async_infer_resnet", "Sends requests via KServe REST API.");
@@ -151,14 +148,19 @@ int main(int argc, char** argv) {
std::vector shape{1};
- // Initialize the inputs with the data.
- tc::InferInput* input;
+ std::vector inputs;
+ std::vector> input_ptrs;
+ for (int i = 0; i < imgs.size(); i++) {
+ tc::InferInput* input;
+ inputs.push_back(input);
- FAIL_IF_ERR(
- tc::InferInput::Create(&input, input_name, shape, "BYTES"),
- "unable to get input");
- std::shared_ptr input_ptr;
- input_ptr.reset(input);
+ FAIL_IF_ERR(
+ tc::InferInput::Create(&input, input_name, shape, "BYTES"),
+ "unable to get input");
+ std::shared_ptr input_ptr;
+ input_ptr.reset(input);
+ input_ptrs.push_back(input_ptr);
+ }
tc::InferOptions options(model_name);
if (args.count("model_version"))
@@ -169,7 +171,6 @@ int main(int argc, char** argv) {
std::cerr << "The provided argument is of a wrong type" << std::endl;
return 1;
}
- std::vector inputs = {input_ptr.get()};
std::vector classes;
std::ifstream lb_f(args["labels_list"].as());
@@ -187,7 +188,7 @@ int main(int argc, char** argv) {
output_ptr.reset(output);
std::vector outputs = {output_ptr.get()};
- std::vector> input_data;
+ std::vector input_data;
input_data.reserve(imgs.size());
for (int i = 0; i < imgs.size(); i++) {
try {
@@ -204,8 +205,9 @@ int main(int argc, char** argv) {
int completedRequestCount = 0;
auto start = std::chrono::high_resolution_clock::now();
for (int i = 0; i < imgs.size(); i++) {
+ std::vector inputs = {input_ptrs[i].get()};
FAIL_IF_ERR(
- input_ptr->AppendRaw(input_data[i]),
+ input_ptrs[i]->AppendFromString({input_data[i]}),
"unable to set data for input");
client->AsyncInfer(
[&, i](tc::InferResult* result) -> int {
@@ -239,7 +241,6 @@ int main(int argc, char** argv) {
return 0;
},
options, inputs, outputs);
- input->Reset();
}
{
std::unique_lock lk(mtx);
diff --git a/client/cpp/kserve-api/samples/http_infer_resnet.cpp b/client/cpp/kserve-api/samples/http_infer_resnet.cpp
index ad02374db3..ba21b0da4c 100644
--- a/client/cpp/kserve-api/samples/http_infer_resnet.cpp
+++ b/client/cpp/kserve-api/samples/http_infer_resnet.cpp
@@ -59,7 +59,7 @@ namespace tc = triton::client;
} \
}
-std::vector load(const std::string& fileName) {
+std::string load(const std::string& fileName) {
std::ifstream file(fileName, std::ios::binary);
file.unsetf(std::ios::skipws);
std::streampos fileSize;
@@ -67,16 +67,13 @@ std::vector load(const std::string& fileName) {
file.seekg(0, std::ios::end);
fileSize = file.tellg();
file.seekg(0, std::ios::beg);
+ std::ostringstream oss;
+ oss << file.rdbuf();
- std::vector vec;
- vec.reserve(fileSize);
-
- vec.insert(vec.begin(),
- std::istream_iterator(file),
- std::istream_iterator());
- return vec;
+ return oss.str();
}
+
int main(int argc, char** argv) {
cxxopts::Options opt("http_infer_resnet", "Sends requests via KServe REST API.");
@@ -181,7 +178,7 @@ int main(int argc, char** argv) {
std::vector results;
results.resize(imgs.size());
for (int i = 0; i < imgs.size(); i++) {
- std::vector input_data;
+ std::string input_data;
try {
input_data = load(imgs[i]);
}
@@ -190,7 +187,7 @@ int main(int argc, char** argv) {
return 1;
}
FAIL_IF_ERR(
- input_ptr->AppendRaw(input_data),
+ input_ptr->AppendFromString({input_data}),
"unable to set data for input");
FAIL_IF_ERR(
client->Infer(&(results[i]), options, inputs, outputs),
diff --git a/client/go/kserve-api/grpc_infer_resnet.go b/client/go/kserve-api/grpc_infer_resnet.go
index 547b87a344..22af02b6ab 100644
--- a/client/go/kserve-api/grpc_infer_resnet.go
+++ b/client/go/kserve-api/grpc_infer_resnet.go
@@ -84,13 +84,20 @@ func ModelInferRequest(client grpc_client.GRPCInferenceServiceClient, fileName s
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
+ bytes, err := ioutil.ReadFile(fileName)
+ contents := grpc_client.InferTensorContents{}
+ contents.BytesContents = append(contents.BytesContents, bytes)
+
+ inferInput := grpc_client.ModelInferRequest_InferInputTensor{
+ Name: "0",
+ Datatype: "BYTES",
+ Shape: []int64{1},
+ Contents: &contents,
+ }
+
// Create request input tensors
inferInputs := []*grpc_client.ModelInferRequest_InferInputTensor{
- &grpc_client.ModelInferRequest_InferInputTensor{
- Name: "0",
- Datatype: "BYTES",
- Shape: []int64{1},
- },
+ &inferInput,
}
// Create inference request for specific model/version
@@ -100,11 +107,6 @@ func ModelInferRequest(client grpc_client.GRPCInferenceServiceClient, fileName s
Inputs: inferInputs,
}
- bytes, err := ioutil.ReadFile(fileName)
-
-
- modelInferRequest.RawInputContents = append(modelInferRequest.RawInputContents, bytes)
-
// Submit inference request to server
modelInferResponse, err := client.ModelInfer(ctx, &modelInferRequest)
if err != nil {
diff --git a/client/java/kserve-api/README.md b/client/java/kserve-api/README.md
index 647d6983f8..86f8184eb2 100644
--- a/client/java/kserve-api/README.md
+++ b/client/java/kserve-api/README.md
@@ -225,7 +225,7 @@ usage: grpc_infer_resnet [OPTION]...
- Usage Example
```Bash
-java -cp target/grpc-client.jar clients.grpc_infer_resnet -imgs ./common/resnet_input_images.txt -lbs ../../../common/resnet_labels.txt --grpc_port 9000
+java -cp target/grpc-client.jar clients.grpc_infer_resnet -imgs ./resnet_input_images.txt -lbs ../../common/resnet_labels.txt --grpc_port 9000
../../../demos/common/static/images/airliner.jpeg classified as 404 airliner
../../../demos/common/static/images/arctic-fox.jpeg classified as 279 Arctic fox, white fox, Alopex lagopus
../../../demos/common/static/images/bee.jpeg classified as 309 bee
diff --git a/client/java/kserve-api/src/main/java/clients/grpc_infer_resnet.java b/client/java/kserve-api/src/main/java/clients/grpc_infer_resnet.java
index 8b65923e45..be2e1b2b52 100644
--- a/client/java/kserve-api/src/main/java/clients/grpc_infer_resnet.java
+++ b/client/java/kserve-api/src/main/java/clients/grpc_infer_resnet.java
@@ -34,6 +34,7 @@
import com.google.protobuf.ByteString;
+import inference.GrpcPredictV2.InferTensorContents;
import inference.GRPCInferenceServiceGrpc;
import inference.GRPCInferenceServiceGrpc.GRPCInferenceServiceBlockingStub;
import inference.GrpcPredictV2.ModelInferRequest;
@@ -102,8 +103,6 @@ public static void main(String[] args) {
input.setDatatype("BYTES");
input.addShape(1);
- request.addInputs(0, input);
-
List labels = new ArrayList<>();
try (FileInputStream fis = new FileInputStream(cmd.getOptionValue("lbs"))) {
try (Scanner sc = new Scanner(fis)) {
@@ -118,13 +117,15 @@ public static void main(String[] args) {
try (FileInputStream fis = new FileInputStream(cmd.getOptionValue("imgs"))) {
try (Scanner sc = new Scanner(fis)) {
while (sc.hasNext()) {
-
+ request.clearInputs();
String[] line = sc.nextLine().split(" ");
String fileName = line[0];
int label = Integer.parseInt(line[1]);
FileInputStream imageStream = new FileInputStream(fileName);
- request.clearRawInputContents();
- request.addRawInputContents(ByteString.readFrom(imageStream));
+ InferTensorContents.Builder input_data = InferTensorContents.newBuilder();
+ input_data.addBytesContents(ByteString.readFrom(imageStream));
+ input.setContents(input_data);
+ request.addInputs(0, input);
ModelInferResponse response = grpc_stub.modelInfer(request.build());
diff --git a/client/python/kserve-api/samples/README.md b/client/python/kserve-api/samples/README.md
index 207d4d6e12..bc2ad7916e 100644
--- a/client/python/kserve-api/samples/README.md
+++ b/client/python/kserve-api/samples/README.md
@@ -329,9 +329,8 @@ Classification accuracy: 100.00
```Bash
python3 grpc_infer_binary_resnet.py --help
-usage: grpc_infer_binary_resnet.py [-h] [--images_list IMAGES_LIST] [--labels_numpy_path LABELS_NUMPY_PATH] [--grpc_address GRPC_ADDRESS]
- [--grpc_port GRPC_PORT] [--input_name INPUT_NAME] [--output_name OUTPUT_NAME] [--batchsize BATCHSIZE]
- [--model_name MODEL_NAME] [--pipeline_name PIPELINE_NAME]
+usage: grpc_infer_binary_resnet.py [-h] [--images_list IMAGES_LIST] [--grpc_address GRPC_ADDRESS] [--grpc_port GRPC_PORT] [--input_name INPUT_NAME] [--output_name OUTPUT_NAME] [--batchsize BATCHSIZE]
+ [--model_name MODEL_NAME] [--pipeline_name PIPELINE_NAME] [--tls]
Sends requests via KServe gRPC API using images in format supported by OpenCV. It displays performance statistics and optionally the model accuracy
@@ -339,8 +338,6 @@ optional arguments:
-h, --help show this help message and exit
--images_list IMAGES_LIST
path to a file with a list of labeled images
- --labels_numpy_path LABELS_NUMPY_PATH
- numpy in shape [n,1] - can be used to check model accuracy
--grpc_address GRPC_ADDRESS
Specify url to grpc service. default:localhost
--grpc_port GRPC_PORT
@@ -355,12 +352,13 @@ optional arguments:
Define model name, must be same as is in service. default: resnet
--pipeline_name PIPELINE_NAME
Define pipeline name, must be same as is in service
+ --tls use TLS communication with GRPC endpoint
```
- Usage Example
```Bash
-python3 grpc_infer_binary_resnet.py --grpc_port 9000 --images_list ../../resnet_input_images.txt --labels_numpy_path ../../lbs.npy --input_name 0 --output_name 1463 --model_name resnet
+python3 grpc_infer_binary_resnet.py --grpc_port 9000 --images_list ../../resnet_input_images.txt --input_name 0 --output_name 1463 --model_name resnet
Start processing:
Model name: resnet
Iteration 0; Processing time: 27.09 ms; speed 36.92 fps
@@ -794,9 +792,8 @@ Classification accuracy: 100.00
```Bash
python3 ./http_infer_binary_resnet.py --help
-usage: http_infer_binary_resnet.py [-h] [--images_list IMAGES_LIST] [--labels_numpy_path LABELS_NUMPY_PATH] [--http_address HTTP_ADDRESS]
- [--http_port HTTP_PORT] [--input_name INPUT_NAME] [--output_name OUTPUT_NAME] [--batchsize BATCHSIZE]
- [--model_name MODEL_NAME] [--pipeline_name PIPELINE_NAME]
+usage: http_infer_binary_resnet.py [-h] [--images_list IMAGES_LIST] [--http_address HTTP_ADDRESS] [--http_port HTTP_PORT] [--input_name INPUT_NAME] [--output_name OUTPUT_NAME] [--batchsize BATCHSIZE]
+ [--model_name MODEL_NAME] [--pipeline_name PIPELINE_NAME] [--tls] [--server_cert SERVER_CERT] [--client_cert CLIENT_CERT] [--client_key CLIENT_KEY]
Sends requests via KServe REST API using binary encoded images. It displays performance statistics and optionally the model accuracy
@@ -804,8 +801,6 @@ optional arguments:
-h, --help show this help message and exit
--images_list IMAGES_LIST
path to a file with a list of labeled images
- --labels_numpy_path LABELS_NUMPY_PATH
- numpy in shape [n,1] - can be used to check model accuracy
--http_address HTTP_ADDRESS
Specify url to http service. default:localhost
--http_port HTTP_PORT
@@ -820,12 +815,19 @@ optional arguments:
Define model name, must be same as is in service. default: resnet
--pipeline_name PIPELINE_NAME
Define pipeline name, must be same as is in service
+ --tls use TLS communication with HTTP endpoint
+ --server_cert SERVER_CERT
+ Path to server certificate
+ --client_cert CLIENT_CERT
+ Path to client certificate
+ --client_key CLIENT_KEY
+ Path to client key
```
- Usage Example
```Bash
-python3 ./http_infer_binary_resnet.py --http_port 8000 --images_list ../../resnet_input_images.txt --labels_numpy_path ../../lbs.npy --input_name 0 --output_name 1463 --model_name resnet
+python3 ./http_infer_binary_resnet.py --http_port 8000 --images_list ../../resnet_input_images.txt --input_name 0 --output_name 1463 --model_name resnet
Start processing:
Model name: resnet
Iteration 0; Processing time: 38.61 ms; speed 25.90 fps
diff --git a/client/python/kserve-api/samples/grpc_infer_binary_resnet.py b/client/python/kserve-api/samples/grpc_infer_binary_resnet.py
index 85a3948119..5346ac94d4 100644
--- a/client/python/kserve-api/samples/grpc_infer_binary_resnet.py
+++ b/client/python/kserve-api/samples/grpc_infer_binary_resnet.py
@@ -17,59 +17,19 @@
import sys
sys.path.append("../../../../demos/common/python")
-import ast
-import grpc
import numpy as np
import classes
import datetime
import argparse
from client_utils import print_statistics
-from tritonclient.grpc import service_pb2, service_pb2_grpc
-from tritonclient.utils import *
-
-DataTypeToContentsFieldName = {
- 'BOOL' : 'bool_contents',
- 'BYTES' : 'bytes_contents',
- 'FP32' : 'fp32_contents',
- 'FP64' : 'fp64_contents',
- 'INT64' : 'int64_contents',
- 'INT32' : 'int_contents',
- 'UINT64' : 'uint64_contents',
- 'UINT32' : 'uint_contents',
- 'INT64' : 'int64_contents',
- 'INT32' : 'int_contents',
-}
-
-def as_numpy(response, name):
- index = 0
- for output in response.outputs:
- if output.name == name:
- shape = []
- for value in output.shape:
- shape.append(value)
- datatype = output.datatype
- field_name = DataTypeToContentsFieldName[datatype]
- contents = getattr(output, "contents")
- contents = getattr(contents, f"{field_name}")
- if index < len(response.raw_output_contents):
- np_array = np.frombuffer(
- response.raw_output_contents[index], dtype=triton_to_np_dtype(output.datatype))
- elif len(contents) != 0:
- np_array = np.array(contents,
- copy=False)
- else:
- np_array = np.empty(0)
- np_array = np_array.reshape(shape)
- return np_array
- else:
- index += 1
- return None
+
+import tritonclient.grpc as grpclient
+
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Sends requests via KServe gRPC API using images in format supported by OpenCV. '
'It displays performance statistics and optionally the model accuracy')
parser.add_argument('--images_list', required=False, default='input_images.txt', help='path to a file with a list of labeled images')
- parser.add_argument('--labels_numpy_path', required=False, help='numpy in shape [n,1] - can be used to check model accuracy')
parser.add_argument('--grpc_address',required=False, default='localhost', help='Specify url to grpc service. default:localhost')
parser.add_argument('--grpc_port',required=False, default=9000, help='Specify port to grpc service. default: 9000')
parser.add_argument('--input_name',required=False, default='input', help='Specify input tensor name. default: input')
@@ -82,10 +42,14 @@ def as_numpy(response, name):
dest='model_name')
parser.add_argument('--pipeline_name', default='', help='Define pipeline name, must be same as is in service',
dest='pipeline_name')
+ parser.add_argument('--tls', default=False, action='store_true', help='use TLS communication with GRPC endpoint')
+ error = False
args = vars(parser.parse_args())
address = "{}:{}".format(args['grpc_address'],args['grpc_port'])
+ input_name = args['input_name']
+ output_name = args['output_name']
processing_times = np.zeros((0),int)
@@ -96,10 +60,6 @@ def as_numpy(response, name):
while batch_size > len(lines):
lines += lines
- if args.get('labels_numpy_path') is not None:
- lbs = np.load(args['labels_numpy_path'], mmap_mode='r', allow_pickle=False)
- matched_count = 0
- total_executed = 0
batch_size = int(args.get('batchsize'))
print('Start processing:')
@@ -108,76 +68,79 @@ def as_numpy(response, name):
iteration = 0
is_pipeline_request = bool(args.get('pipeline_name'))
- # Create gRPC stub for communicating with the server
- channel = grpc.insecure_channel(address)
- grpc_stub = service_pb2_grpc.GRPCInferenceServiceStub(channel)
+ model_name = args.get('pipeline_name') if is_pipeline_request else args.get('model_name')
+
+ try:
+ triton_client = grpclient.InferenceServerClient(
+ url=address,
+ ssl=args['tls'],
+ verbose=False)
+ except Exception as e:
+ print("context creation failed: " + str(e))
+ sys.exit()
+
+ processing_times = np.zeros((0),int)
+ total_executed = 0
+ matched_count = 0
batch_i = 0
image_data = []
+ image_binary_size = []
labels = []
for line in lines:
+ inputs = []
batch_i += 1
path, label = line.strip().split(" ")
with open(path, 'rb') as f:
image_data.append(f.read())
- labels.append(label)
+ labels.append(int(label))
if batch_i < batch_size:
continue
- inputs = []
- inputs.append(service_pb2.ModelInferRequest().InferInputTensor())
- inputs[0].name = args['input_name']
- inputs[0].datatype = "BYTES"
- inputs[0].shape.extend([1])
- inputs[0].contents.bytes_contents.append(image_data[0])
-
+ inputs.append(grpclient.InferInput(args['input_name'], [batch_i], "BYTES"))
outputs = []
- outputs.append(service_pb2.ModelInferRequest().InferRequestedOutputTensor())
- outputs[0].name = "prob"
-
- request = service_pb2.ModelInferRequest()
- request.model_name = args.get('pipeline_name') if is_pipeline_request else args.get('model_name')
- request.inputs.extend(inputs)
-
+ outputs.append(grpclient.InferRequestedOutput(output_name))
+
+ nmpy = np.array(image_data , dtype=np.object_)
+ inputs[0].set_data_from_numpy(nmpy)
start_time = datetime.datetime.now()
- request.outputs.extend(outputs)
- response = grpc_stub.ModelInfer(request)
+ results = triton_client.infer(model_name=model_name,
+ inputs=inputs,
+ outputs=outputs)
end_time = datetime.datetime.now()
-
duration = (end_time - start_time).total_seconds() * 1000
processing_times = np.append(processing_times,np.array([int(duration)]))
- output = as_numpy(response, args['output_name'])
+ output = results.as_numpy(output_name)
nu = np.array(output)
# for object classification models show imagenet class
print('Iteration {}; Processing time: {:.2f} ms; speed {:.2f} fps'.format(iteration,round(np.average(duration), 2),
- round(1000 * batch_size / np.average(duration), 2)
- ))
+ round(1000 * batch_size / np.average(duration), 2)
+ ))
# Comment out this section for non imagenet datasets
print("imagenet top results in a single batch:")
for i in range(nu.shape[0]):
- lbs_i = iteration * batch_size
- single_result = nu[[i],...]
- offset = 0
- if nu.shape[1] == 1001:
- offset = 1
- ma = np.argmax(single_result) - offset
+ if is_pipeline_request:
+ # shape (1,)
+ print("response shape", output.shape)
+ ma = nu[0] - 1 # indexes needs to be shifted left due to 1x1001 shape
+ else:
+ # shape (1,1000)
+ single_result = nu[[i],...]
+ offset = 0
+ if nu.shape[1] == 1001:
+ offset = 1
+ ma = np.argmax(single_result) - offset
mark_message = ""
- if args.get('labels_numpy_path') is not None:
- total_executed += 1
- if ma == lbs[lbs_i + i]:
- matched_count += 1
- mark_message = "; Correct match."
- else:
- mark_message = "; Incorrect match. Should be {} {}".format(lbs[lbs_i + i], classes.imagenet_classes[lbs[lbs_i + i]] )
- print("\t",i, classes.imagenet_classes[ma],ma, mark_message)
-
+ total_executed += 1
+ if ma == labels[i]:
+ matched_count += 1
+ mark_message = "; Correct match."
+ else:
+ mark_message = "; Incorrect match. Should be".format(labels[i], classes.imagenet_classes[labels[i]])
+ print("\t", i, classes.imagenet_classes[ma], ma, mark_message)
# Comment out this section for non imagenet datasets
- iteration += 1
- image_data = []
labels = []
+ image_data = []
batch_i = 0
print_statistics(processing_times, batch_size)
-
- if args.get('labels_numpy_path') is not None:
- print('Classification accuracy: {:.2f}'.format(100*matched_count/total_executed))
-
+ print('Classification accuracy: {:.2f}'.format(100*matched_count/total_executed))
diff --git a/client/python/kserve-api/samples/http_infer_binary_resnet.py b/client/python/kserve-api/samples/http_infer_binary_resnet.py
index 4e1a213b42..b3fb25f700 100644
--- a/client/python/kserve-api/samples/http_infer_binary_resnet.py
+++ b/client/python/kserve-api/samples/http_infer_binary_resnet.py
@@ -22,15 +22,14 @@
import datetime
import argparse
from client_utils import print_statistics
-import requests
-import json
+
+import tritonclient.http as httpclient
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Sends requests via KServe REST API using binary encoded images. '
'It displays performance statistics and optionally the model accuracy')
parser.add_argument('--images_list', required=False, default='input_images.txt', help='path to a file with a list of labeled images')
- parser.add_argument('--labels_numpy_path', required=False, help='numpy in shape [n,1] - can be used to check model accuracy')
parser.add_argument('--http_address',required=False, default='localhost', help='Specify url to http service. default:localhost')
parser.add_argument('--http_port',required=False, default=8000, help='Specify port to http service. default: 8000')
parser.add_argument('--input_name',required=False, default='input', help='Specify input tensor name. default: input')
@@ -43,6 +42,10 @@
dest='model_name')
parser.add_argument('--pipeline_name', default='', help='Define pipeline name, must be same as is in service',
dest='pipeline_name')
+ parser.add_argument('--tls', default=False, action='store_true', help='use TLS communication with HTTP endpoint')
+ parser.add_argument('--server_cert', required=False, help='Path to server certificate', default=None)
+ parser.add_argument('--client_cert', required=False, help='Path to client certificate', default=None)
+ parser.add_argument('--client_key', required=False, help='Path to client key', default=None)
error = False
args = vars(parser.parse_args())
@@ -51,6 +54,15 @@
input_name = args['input_name']
output_name = args['output_name']
+ if args['tls']:
+ ssl_options = {
+ 'keyfile':args['client_key'],
+ 'cert_file':args['client_cert'],
+ 'ca_certs':args['server_cert']
+ }
+ else:
+ ssl_options = None
+
processing_times = np.zeros((0),int)
input_images = args.get('images_list')
@@ -60,10 +72,6 @@
while batch_size > len(lines):
lines += lines
- if args.get('labels_numpy_path') is not None:
- lbs = np.load(args['labels_numpy_path'], mmap_mode='r', allow_pickle=False)
- matched_count = 0
- total_executed = 0
batch_size = int(args.get('batchsize'))
print('Start processing:')
@@ -74,72 +82,79 @@
model_name = args.get('pipeline_name') if is_pipeline_request else args.get('model_name')
- url = f"http://{address}/v2/models/{model_name}/infer"
- http_session = requests.session()
+ try:
+ triton_client = httpclient.InferenceServerClient(
+ url=address,
+ ssl=args['tls'],
+ ssl_options=ssl_options,
+ verbose=False)
+ except Exception as e:
+ print("context creation failed: " + str(e))
+ sys.exit()
+ processing_times = np.zeros((0),int)
+
+ total_executed = 0
+ matched_count = 0
batch_i = 0
image_data = []
image_binary_size = []
labels = []
for line in lines:
+ inputs = []
batch_i += 1
path, label = line.strip().split(" ")
with open(path, 'rb') as f:
image_data.append(f.read())
- image_binary_size.append(len(image_data[-1]))
- labels.append(label)
+ labels.append(int(label))
if batch_i < batch_size:
continue
- image_binary_size_str = ",".join(map(str, image_binary_size))
- inference_header = {"inputs":[{"name":input_name,"shape":[batch_i],"datatype":"BYTES","parameters":{"binary_data_size":image_binary_size_str}}]}
- inference_header_binary = json.dumps(inference_header).encode()
-
+ inputs.append(httpclient.InferInput(args['input_name'], [batch_i], "BYTES"))
+ outputs = []
+ outputs.append(httpclient.InferRequestedOutput(output_name, binary_data=True))
+
+ nmpy = np.array(image_data , dtype=np.object_)
+ inputs[0].set_data_from_numpy(nmpy)
start_time = datetime.datetime.now()
- results = http_session.post(url, inference_header_binary + b''.join(image_data), headers={"Inference-Header-Content-Length":str(len(inference_header_binary))})
+ results = triton_client.infer(model_name=model_name,
+ inputs=inputs,
+ outputs=outputs)
end_time = datetime.datetime.now()
duration = (end_time - start_time).total_seconds() * 1000
processing_times = np.append(processing_times,np.array([int(duration)]))
- results_dict = json.loads(results.text)
- if "error" in results_dict.keys():
- print(f"Error: {results_dict['error']}")
- error = True
- break
-
- output = np.array(json.loads(results.text)['outputs'][0]['data'])
- output_shape = tuple(results_dict['outputs'][0]['shape'])
- nu = np.reshape(output, output_shape)
+ output = results.as_numpy(output_name)
+ nu = np.array(output)
# for object classification models show imagenet class
print('Iteration {}; Processing time: {:.2f} ms; speed {:.2f} fps'.format(iteration,round(np.average(duration), 2),
- round(1000 * batch_size / np.average(duration), 2)
- ))
+ round(1000 * batch_size / np.average(duration), 2)
+ ))
# Comment out this section for non imagenet datasets
print("imagenet top results in a single batch:")
for i in range(nu.shape[0]):
- lbs_i = iteration * batch_size
- single_result = nu[[i],...]
- offset = 0
- if nu.shape[1] == 1001:
- offset = 1
- ma = np.argmax(single_result) - offset
+ if is_pipeline_request:
+ # shape (1,)
+ print("response shape", output.shape)
+ ma = nu[0] - 1 # indexes needs to be shifted left due to 1x1001 shape
+ else:
+ # shape (1,1000)
+ single_result = nu[[i],...]
+ offset = 0
+ if nu.shape[1] == 1001:
+ offset = 1
+ ma = np.argmax(single_result) - offset
mark_message = ""
- if args.get('labels_numpy_path') is not None:
- total_executed += 1
- if ma == lbs[lbs_i + i]:
- matched_count += 1
- mark_message = "; Correct match."
- else:
- mark_message = "; Incorrect match. Should be {} {}".format(lbs[lbs_i + i], classes.imagenet_classes[lbs[lbs_i + i]] )
- print("\t",i, classes.imagenet_classes[ma],ma, mark_message)
+ total_executed += 1
+ if ma == labels[i]:
+ matched_count += 1
+ mark_message = "; Correct match."
+ else:
+ mark_message = "; Incorrect match. Should be".format(labels[i], classes.imagenet_classes[labels[i]])
+ print("\t", i, classes.imagenet_classes[ma], ma, mark_message)
# Comment out this section for non imagenet datasets
- iteration += 1
- image_data = []
- image_binary_size = []
labels = []
+ image_data = []
batch_i = 0
- if not error:
- print_statistics(processing_times, batch_size)
-
- if args.get('labels_numpy_path') is not None:
- print('Classification accuracy: {:.2f}'.format(100*matched_count/total_executed))
-
+ print_statistics(processing_times, batch_size)
+ print('Classification accuracy: {:.2f}'.format(100*matched_count/total_executed))
+
\ No newline at end of file
diff --git a/client/python/kserve-api/samples/requirements.txt b/client/python/kserve-api/samples/requirements.txt
index 42e7cf9c8f..222b7adf1b 100644
--- a/client/python/kserve-api/samples/requirements.txt
+++ b/client/python/kserve-api/samples/requirements.txt
@@ -1,4 +1,4 @@
protobuf==3.19.5
tritonclient[all]==2.22.3
-requests==2.27.1
+requests==2.31.0
grpcio
diff --git a/client/python/ovmsclient/lib/Makefile b/client/python/ovmsclient/lib/Makefile
index 6c87be0844..6da873c155 100644
--- a/client/python/ovmsclient/lib/Makefile
+++ b/client/python/ovmsclient/lib/Makefile
@@ -18,7 +18,7 @@ VIRTUALENV_EXE := python3 -m virtualenv -p python3
VIRTUALENV_DIR := .venv-ovmsclient
ACTIVATE="$(VIRTUALENV_DIR)/bin/activate"
-PACKAGE_PATH := dist/ovmsclient-2022.3-py3-none-any.whl
+PACKAGE_PATH := dist/ovmsclient-2023.0-py3-none-any.whl
TEST_TYPE := BASIC
.PHONY: build-deps build-package build test clean style
diff --git a/client/python/ovmsclient/lib/README.md b/client/python/ovmsclient/lib/README.md
index d1f87dbc68..7e09b663dd 100644
--- a/client/python/ovmsclient/lib/README.md
+++ b/client/python/ovmsclient/lib/README.md
@@ -44,7 +44,7 @@ Assuming you have TFS API built, you can use `make build-package` target to buil
**To install the package run:**
```bash
-pip3 install --force-reinstall --no-deps dist/ovmsclient-2022.3-py3-none-any.whl
+pip3 install --force-reinstall --no-deps dist/ovmsclient-2023.0-py3-none-any.whl
```
*Note*: For development purposes you may want to repeatedly reinstall the package.
@@ -61,7 +61,7 @@ make build-package
```
- `make test` - runs tests on `ovmsclient` package. By default the package located in `dist/` directory is used. To specify custom package path pass `PACKAGE_PATH` option like:
- `make test PACKAGE_PATH=/opt/packages/ovmsclient-2022.3-py3-none-any.whl`
+ `make test PACKAGE_PATH=/opt/packages/ovmsclient-2023.0-py3-none-any.whl`
```bash
make test
```
diff --git a/client/python/ovmsclient/lib/docs/grpc_client.md b/client/python/ovmsclient/lib/docs/grpc_client.md
index ba8b5f619d..1ce25a3cc1 100644
--- a/client/python/ovmsclient/lib/docs/grpc_client.md
+++ b/client/python/ovmsclient/lib/docs/grpc_client.md
@@ -182,7 +182,7 @@ Request prediction on provided inputs.
...
}
```
- Where `output_name` is a `string` and `prediction_result` is a `numpy ndarray`
+ Where `output_name` is a `string` and `prediction_result` is a `numpy ndarray`. Both strings and binary data are returned as array of `numpy.bytes_` dtype.
**Raises:**
@@ -202,12 +202,21 @@ Request prediction on provided inputs.
```python
import ovmsclient
client = ovmsclient.make_grpc_client("localhost:9000")
+
+# Numeric input
inputs = {"input": [1, 2, 3]}
# request prediction on specific model version, with timeout set to 2.5 seconds
results = client.predict(inputs=inputs, model_name="model", model_version=1, timeout=2.5)
# request prediction on the latest model version
results = client.predict(inputs=inputs, model_name="model")
+# String input
+inputs = {"input": ["We have a really nice", "One, two, three,"]}
+results = client.predict(inputs=inputs, model_name="model")
+print(inputs["input"])
+# ['We have a really nice', 'One, two, three,']
+print(results)
+# [b'We have a really nice way' b'One, two, three, four']
```
---
diff --git a/client/python/ovmsclient/lib/docs/http_client.md b/client/python/ovmsclient/lib/docs/http_client.md
index 731afd3742..15d7f64259 100644
--- a/client/python/ovmsclient/lib/docs/http_client.md
+++ b/client/python/ovmsclient/lib/docs/http_client.md
@@ -180,7 +180,7 @@ Request prediction on provided inputs.
...
}
```
- Where `output_name` is a `string` and `prediction_result` is a `numpy ndarray`
+ Where `output_name` is a `string` and `prediction_result` is a `numpy ndarray`. Both strings and binary data are returned as array of `numpy.bytes_` dtype.
**Raises:**
@@ -200,12 +200,21 @@ Request prediction on provided inputs.
```python
import ovmsclient
client = ovmsclient.make_http_client("localhost:9000")
+
+# Numeric input
inputs = {"input": [1, 2, 3]}
# request prediction on specific model version, with timeout set to 2.5 seconds
results = client.predict(inputs=inputs, model_name="model", model_version=1, timeout=2.5)
# request prediction on the latest model version
results = client.predict(inputs=inputs, model_name="model")
+# String input
+inputs = {"input": ["We have a really nice", "One, two, three,"]}
+results = client.predict(inputs=inputs, model_name="model")
+print(inputs["input"])
+# ['We have a really nice', 'One, two, three,']
+print(results)
+# [b'We have a really nice way' b'One, two, three, four']
```
---
diff --git a/client/python/ovmsclient/lib/docs/pypi_overview.md b/client/python/ovmsclient/lib/docs/pypi_overview.md
index 0578151125..232440a0ba 100644
--- a/client/python/ovmsclient/lib/docs/pypi_overview.md
+++ b/client/python/ovmsclient/lib/docs/pypi_overview.md
@@ -9,7 +9,7 @@ The `ovmsclient` package works both with OpenVINO™ Model Server and Tensor
The `ovmsclient` can replace `tensorflow-serving-api` package with reduced footprint and simplified interface.
-See [API reference](https://github.com/openvinotoolkit/model_server/blob/releases/2022/3/client/python/ovmsclient/lib/docs/README.md) for usage details.
+See [API reference](https://github.com/openvinotoolkit/model_server/blob/releases/2023/0/client/python/ovmsclient/lib/docs/README.md) for usage details.
## Usage example
@@ -38,4 +38,4 @@ results = client.predict(inputs=inputs, model_name="model")
```
-Learn more on `ovmsclient` [documentation site](https://github.com/openvinotoolkit/model_server/tree/releases/2022/3/client/python/ovmsclient/lib).
\ No newline at end of file
+Learn more on `ovmsclient` [documentation site](https://github.com/openvinotoolkit/model_server/tree/releases/2023/0/client/python/ovmsclient/lib).
\ No newline at end of file
diff --git a/client/python/ovmsclient/lib/ovmsclient/tfs_compat/grpc/tensors.py b/client/python/ovmsclient/lib/ovmsclient/tfs_compat/grpc/tensors.py
index e1549a7ad4..b955be3161 100644
--- a/client/python/ovmsclient/lib/ovmsclient/tfs_compat/grpc/tensors.py
+++ b/client/python/ovmsclient/lib/ovmsclient/tfs_compat/grpc/tensors.py
@@ -24,32 +24,51 @@
class TensorType(NamedTuple):
+ NumpyPrimaryType: np.dtype.type
TensorDtype: str
TensorProtoField: str
NP_TO_TENSOR_MAP = {
- np.float16: TensorType(TensorDtype=DataType.DT_HALF, TensorProtoField="half_val"),
- np.float32: TensorType(TensorDtype=DataType.DT_FLOAT, TensorProtoField="float_val"),
- np.float64: TensorType(TensorDtype=DataType.DT_DOUBLE, TensorProtoField="double_val"),
- np.int8: TensorType(TensorDtype=DataType.DT_INT8, TensorProtoField="int_val"),
- np.int16: TensorType(TensorDtype=DataType.DT_INT16, TensorProtoField="int_val"),
- np.int32: TensorType(TensorDtype=DataType.DT_INT32, TensorProtoField="int_val"),
- np.int64: TensorType(TensorDtype=DataType.DT_INT64, TensorProtoField="int64_val"),
- np.uint8: TensorType(TensorDtype=DataType.DT_UINT8, TensorProtoField="int_val"),
- np.uint16: TensorType(TensorDtype=DataType.DT_UINT16, TensorProtoField="int_val"),
- np.uint32: TensorType(TensorDtype=DataType.DT_UINT32, TensorProtoField="uint32_val"),
- np.uint64: TensorType(TensorDtype=DataType.DT_UINT64, TensorProtoField="uint64_val"),
- np.complex64: TensorType(TensorDtype=DataType.DT_COMPLEX64, TensorProtoField="scomplex_val"),
- np.complex128: TensorType(TensorDtype=DataType.DT_COMPLEX128, TensorProtoField="dcomplex_val"),
+ np.float16: TensorType(NumpyPrimaryType=np.float16, TensorDtype=DataType.DT_HALF,
+ TensorProtoField="half_val"),
+ np.float32: TensorType(NumpyPrimaryType=np.float32, TensorDtype=DataType.DT_FLOAT,
+ TensorProtoField="float_val"),
+ np.float64: TensorType(NumpyPrimaryType=np.float64, TensorDtype=DataType.DT_DOUBLE,
+ TensorProtoField="double_val"),
+ np.int8: TensorType(NumpyPrimaryType=np.int8, TensorDtype=DataType.DT_INT8,
+ TensorProtoField="int_val"),
+ np.int16: TensorType(NumpyPrimaryType=np.int16, TensorDtype=DataType.DT_INT16,
+ TensorProtoField="int_val"),
+ np.int32: TensorType(NumpyPrimaryType=np.int32, TensorDtype=DataType.DT_INT32,
+ TensorProtoField="int_val"),
+ np.int64: TensorType(NumpyPrimaryType=np.int64, TensorDtype=DataType.DT_INT64,
+ TensorProtoField="int64_val"),
+ np.uint8: TensorType(NumpyPrimaryType=np.uint8, TensorDtype=DataType.DT_UINT8,
+ TensorProtoField="int_val"),
+ np.uint16: TensorType(NumpyPrimaryType=np.uint16, TensorDtype=DataType.DT_UINT16,
+ TensorProtoField="int_val"),
+ np.uint32: TensorType(NumpyPrimaryType=np.uint32, TensorDtype=DataType.DT_UINT32,
+ TensorProtoField="uint32_val"),
+ np.uint64: TensorType(NumpyPrimaryType=np.uint64, TensorDtype=DataType.DT_UINT64,
+ TensorProtoField="uint64_val"),
+ np.complex64: TensorType(NumpyPrimaryType=np.complex64, TensorDtype=DataType.DT_COMPLEX64,
+ TensorProtoField="scomplex_val"),
+ np.complex128: TensorType(NumpyPrimaryType=np.complex128, TensorDtype=DataType.DT_COMPLEX128,
+ TensorProtoField="dcomplex_val"),
# Standard Python bool and np.bool_ replace deprecated np.bool type
- np.bool_: TensorType(TensorDtype=DataType.DT_BOOL, TensorProtoField="bool_val"),
- bool: TensorType(TensorDtype=DataType.DT_BOOL, TensorProtoField="bool_val"),
- np.bytes_: TensorType(TensorDtype=DataType.DT_STRING, TensorProtoField="string_val")
+ bool: TensorType(NumpyPrimaryType=np.bool_, TensorDtype=DataType.DT_BOOL,
+ TensorProtoField="bool_val"),
+ np.bool_: TensorType(NumpyPrimaryType=np.bool_, TensorDtype=DataType.DT_BOOL,
+ TensorProtoField="bool_val"),
+ np.str_: TensorType(NumpyPrimaryType=np.bytes_, TensorDtype=DataType.DT_STRING,
+ TensorProtoField="string_val"),
+ np.bytes_: TensorType(NumpyPrimaryType=np.bytes_, TensorDtype=DataType.DT_STRING,
+ TensorProtoField="string_val"),
}
-TENSOR_TO_NP_MAP = {v.TensorDtype: k for k, v in NP_TO_TENSOR_MAP.items()}
+TENSOR_TO_NP_MAP = {v.TensorDtype: v.NumpyPrimaryType for v in NP_TO_TENSOR_MAP.values()}
TENSOR_DTYPE_TO_PROTOFIELD = {v.TensorDtype: v.TensorProtoField for v in NP_TO_TENSOR_MAP.values()}
@@ -79,8 +98,12 @@ def _cast_bytes_to_dtype(values, dtype):
raise ValueError(f'could not cast bytes to {dtype}. {e_info}')
-def _is_bytes_shape_valid(inferred_shape, tensor_values):
- return (len(inferred_shape) > 1 or (len(tensor_values.shape) > 1 and inferred_shape == []))
+# Functioned used with numpy.vectorize()
+def _encode_string_to_bytes(tensor_value, encoding="UTF-8"):
+ return tensor_value.encode(encoding)
+
+
+_encode_strings_to_bytes = np.vectorize(_encode_string_to_bytes)
def _check_if_array_homogeneous(tensor_values):
@@ -185,13 +208,18 @@ def make_tensor_proto(values, dtype=None, shape=None):
else:
tensor_values = _cast_ndarray_to_dtype(tensor_values, np_dtype)
- if dtype == DataType.DT_STRING and _is_bytes_shape_valid(inferred_shape, tensor_values):
- raise ValueError("bytes values with dtype DT_STRING must be in shape [N]")
- elif inferred_shape == []:
+ if inferred_shape == []:
inferred_shape = list(tensor_values.shape)
elif inferred_shape != list(tensor_values.shape):
tensor_values = tensor_values.reshape(inferred_shape)
+ # For strings or binary image inputs flatten array to 1-D
+ if dtype == DataType.DT_STRING:
+ tensor_values = np.ravel(tensor_values)
+ # Encode strings
+ if tensor_values.dtype.type == np.str_:
+ tensor_values = _encode_strings_to_bytes(tensor_values)
+
dims = []
for d in inferred_shape:
dims.append(TensorShapeProto.Dim(size=d))
@@ -274,7 +302,7 @@ def make_ndarray(tensor_proto):
elif np_dtype == np.complex128:
it = iter(tensor_proto.dcomplex_val)
values = np.array([complex(x[0], x[1]) for x in zip(it, it)], dtype=np_dtype)
- elif np_dtype == np.bool_ or np_dtype == bool:
+ elif np_dtype == np.bool_:
values = np.fromiter(tensor_proto.bool_val, dtype=np_dtype)
else:
raise TypeError("Unsupported tensor type: %s" % tensor_proto.dtype)
diff --git a/client/python/ovmsclient/lib/ovmsclient/tfs_compat/http/requests.py b/client/python/ovmsclient/lib/ovmsclient/tfs_compat/http/requests.py
index 35af70d609..1d016eb4a0 100644
--- a/client/python/ovmsclient/lib/ovmsclient/tfs_compat/http/requests.py
+++ b/client/python/ovmsclient/lib/ovmsclient/tfs_compat/http/requests.py
@@ -20,7 +20,6 @@
from ovmsclient.tfs_compat.base.requests import (PredictRequest, ModelMetadataRequest,
ModelStatusRequest, _check_model_spec)
from ovmsclient.tfs_compat.grpc.tensors import (NP_TO_TENSOR_MAP, DataType,
- _is_bytes_shape_valid,
_check_if_array_homogeneous)
@@ -167,6 +166,14 @@ def make_status_request(model_name, model_version=0):
return HttpModelStatusRequest(model_name, model_version)
+# Functioned used with numpy.vectorize()
+def _decode_bytes_to_string(tensor_value, encoding="UTF-8"):
+ return tensor_value.decode(encoding)
+
+
+_decode_bytes_to_strings = np.vectorize(_decode_bytes_to_string)
+
+
def _parse_input_data(values):
# create numpy ndarray from values and find its dtype if not provided
@@ -187,14 +194,14 @@ def _parse_input_data(values):
else:
raise TypeError("provided values type is not valid")
- if dtype == DataType.DT_STRING and _is_bytes_shape_valid(tensor_values.shape, tensor_values):
- raise ValueError("bytes values with dtype DT_STRING must be in shape [N]")
-
- if dtype == DataType.DT_STRING:
- b64_values = []
- for value in tensor_values:
- b64_value = base64.b64encode(value).decode('utf-8')
- b64_values.append({"b64": b64_value})
- return b64_values
- else:
- return tensor_values.tolist()
+ if dtype == DataType.DT_STRING and tensor_values.dtype.type == np.bytes_:
+ if len(tensor_values.shape) != 1:
+ # Bytes in multidimensional shape will be encoded in UTF-8 as regular input
+ tensor_values = _decode_bytes_to_strings(tensor_values)
+ else:
+ b64_values = []
+ for value in tensor_values:
+ b64_value = base64.b64encode(value).decode('utf-8')
+ b64_values.append({"b64": b64_value})
+ return b64_values
+ return tensor_values.tolist()
diff --git a/client/python/ovmsclient/lib/scripts/build_tfs_api.sh b/client/python/ovmsclient/lib/scripts/build_tfs_api.sh
index 6eceee3102..451ae08372 100755
--- a/client/python/ovmsclient/lib/scripts/build_tfs_api.sh
+++ b/client/python/ovmsclient/lib/scripts/build_tfs_api.sh
@@ -36,6 +36,7 @@ mkdir compiled_protos
cp -R tf/tensorflow ovmsclient/tfs_compat/protos/tensorflow
cp -R tfs/tensorflow_serving ovmsclient/tfs_compat/protos/tensorflow_serving
find ovmsclient/tfs_compat/protos -name "*.proto" -exec sed -i -E 's/import "tensorflow/import "ovmsclient\/tfs_compat\/protos\/tensorflow/g' {} \;
+python3 scripts/rename_proto_package.py
protoc --proto_path=$PWD --python_out=$PWD/compiled_protos \
$PWD/ovmsclient/tfs_compat/protos/tensorflow/core/framework/*.proto \
diff --git a/client/python/ovmsclient/lib/scripts/rename_proto_package.py b/client/python/ovmsclient/lib/scripts/rename_proto_package.py
new file mode 100644
index 0000000000..363f62b270
--- /dev/null
+++ b/client/python/ovmsclient/lib/scripts/rename_proto_package.py
@@ -0,0 +1,44 @@
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+
+base_path = "ovmsclient/tfs_compat/protos"
+
+# Find all proto files
+proto_paths = []
+for root, subdirs, files in os.walk(base_path):
+ for file in files:
+ if file.endswith(".proto"):
+ file_path = os.path.join(root, file)
+ proto_paths.append(file_path)
+
+# Replace package name if defined in all proto files
+replacement_map = {
+ "package tensorflow": "package ovmsclient",
+ " tensorflow.": " ovmsclient.",
+ " .tensorflow.": " .ovmsclient."
+}
+
+for proto_path in proto_paths:
+ with open(proto_path, 'rt') as file :
+ filedata = file.read()
+
+ for to_replace, replace_with in replacement_map.items():
+ filedata = filedata.replace(to_replace, replace_with)
+
+ with open(proto_path, 'wt') as file:
+ file.write(filedata)
diff --git a/client/python/ovmsclient/lib/setup.py b/client/python/ovmsclient/lib/setup.py
index b406a9c5e2..920c7f132e 100644
--- a/client/python/ovmsclient/lib/setup.py
+++ b/client/python/ovmsclient/lib/setup.py
@@ -49,7 +49,7 @@ def run(self):
setuptools.setup(
name="ovmsclient",
- version="2022.3",
+ version="2023.0",
license="Apache License 2.0",
author="Intel Corporation",
author_email="ovms.engineering@intel.com",
diff --git a/client/python/ovmsclient/lib/tests/config.py b/client/python/ovmsclient/lib/tests/config.py
index 6a0603cb73..a204ac5dd0 100644
--- a/client/python/ovmsclient/lib/tests/config.py
+++ b/client/python/ovmsclient/lib/tests/config.py
@@ -15,6 +15,7 @@
#
from enum import IntEnum
+import numpy as np
class CallCount(IntEnum):
@@ -337,7 +338,7 @@ class CallCount(IntEnum):
TypeError, "inputs keys type should be str, but found int"
),
(
- [{"input": [1, 2, "three"]}, "model_name", 1, 1],
+ [{"input": np.array(b'1', dtype=np.dtype(np.void, 10))}, "model_name", 1, 1],
TypeError, "provided values type is not valid"
),
(
diff --git a/client/python/ovmsclient/lib/tests/tfs_compat_grpc/config.py b/client/python/ovmsclient/lib/tests/tfs_compat_grpc/config.py
index a12d8452ef..5a809e6a4b 100644
--- a/client/python/ovmsclient/lib/tests/tfs_compat_grpc/config.py
+++ b/client/python/ovmsclient/lib/tests/tfs_compat_grpc/config.py
@@ -176,14 +176,6 @@
"input1": (1, 2, 3)
}, 'model_name', 0, TypeError,
"values type should be (list, np.ndarray, scalar), but is tuple"),
- ({
- "input1": [
- [bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00]),
- bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00])],
- [bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00]),
- bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00])]
- ]
- }, 'model_name', 0, ValueError, "bytes values with dtype DT_STRING must be in shape [N]"),
]
# (inputs_dict,
@@ -222,7 +214,9 @@
TensorShapeProto.Dim(size=3)]),
tensor_content=array([1, 2, 3, 4, 5, 6]).tobytes()),
"input2": 5.0,
- "input3": bytes([1, 2, 3])
+ "input3": bytes([1, 2, 3]),
+ "input4": [[bytes([1, 2, 3]), bytes([1, 2, 3])], [bytes([1, 2, 3]), bytes([1, 2, 3])]],
+ "input5": [["list", "of", "strings"]],
}, {
"input2": {
"field": "float_val",
@@ -235,7 +229,21 @@
"shape": TensorShapeProto(dim=[TensorShapeProto.Dim(size=1)]),
"dtype": DataType.DT_STRING,
'value': [bytes([1, 2, 3])]
- }
+ },
+ "input4": {
+ "field": "string_val",
+ "shape": TensorShapeProto(dim=[TensorShapeProto.Dim(size=2),
+ TensorShapeProto.Dim(size=2)]),
+ "dtype": DataType.DT_STRING,
+ 'value': [bytes([1, 2, 3]), bytes([1, 2, 3]), bytes([1, 2, 3]), bytes([1, 2, 3])]
+ },
+ "input5": {
+ "field": "string_val",
+ "shape": TensorShapeProto(dim=[TensorShapeProto.Dim(size=1),
+ TensorShapeProto.Dim(size=3)]),
+ "dtype": DataType.DT_STRING,
+ 'value': [b'list', b'of', b'strings']
+ },
}, 'model_name', 0),
({
@@ -274,9 +282,20 @@
"2": TensorProto(dtype=DataType.DT_STRING,
tensor_shape=TensorShapeProto(dim=[TensorShapeProto.Dim(size=1)]),
string_val=[bytes([1, 2, 3])]),
+ "3": TensorProto(dtype=DataType.DT_STRING,
+ tensor_shape=TensorShapeProto(dim=[TensorShapeProto.Dim(size=1),
+ TensorShapeProto.Dim(size=3)]),
+ string_val=[b'list', b'of', b'strings']),
+ "4": TensorProto(dtype=DataType.DT_STRING,
+ tensor_shape=TensorShapeProto(dim=[TensorShapeProto.Dim(size=2),
+ TensorShapeProto.Dim(size=2)]),
+ string_val=[bytes([1, 2, 3]), bytes([1, 2, 3]),
+ bytes([1, 2, 3]), bytes([1, 2, 3])]),
}, "model_name", 0, {
"1463": [bytes([1, 2, 3]), bytes([4, 5])],
- "2": [bytes([1, 2, 3])]
+ "2": [bytes([1, 2, 3])],
+ "3": [[b'list', b'of', b'strings']],
+ "4": [[bytes([1, 2, 3]), bytes([1, 2, 3])], [bytes([1, 2, 3]), bytes([1, 2, 3])]]
}),
]
diff --git a/client/python/ovmsclient/lib/tests/tfs_compat_grpc/test_tensors.py b/client/python/ovmsclient/lib/tests/tfs_compat_grpc/test_tensors.py
index fe881fde03..30aca1b11d 100644
--- a/client/python/ovmsclient/lib/tests/tfs_compat_grpc/test_tensors.py
+++ b/client/python/ovmsclient/lib/tests/tfs_compat_grpc/test_tensors.py
@@ -348,12 +348,22 @@ def test_make_tensor_proto_valid_scalar(params, expected_shape, expected_dtype,
TensorShapeProto(dim=[TensorShapeProto.Dim(size=1)]), DataType.DT_STRING,
"string_val"
),
+ ({"values": [[bytes([0x13, 0x00, 0x00, 0x00, 0x08]), bytes([0x13, 0x00, 0x00, 0x00, 0x08])]],
+ "dtype": DataType.DT_STRING},
+ TensorShapeProto(dim=[TensorShapeProto.Dim(size=1), TensorShapeProto.Dim(size=2)]),
+ DataType.DT_STRING,
+ "string_val"
+ ),
])
-def test_make_tensor_proto_valid_string(params, expected_shape, expected_dtype, expected_field):
+def test_make_tensor_proto_valid_binary(params, expected_shape, expected_dtype, expected_field):
tensor_proto = make_tensor_proto(**params)
if expected_field == "string_val":
- assert tensor_proto.__getattribute__(expected_field) == [params["values"]]
+ if type(params["values"]) is not list:
+ assert tensor_proto.__getattribute__(expected_field) == [params["values"]]
+ else:
+ assert (tensor_proto.__getattribute__(expected_field)
+ == np.ravel(params["values"]).tolist())
else:
assert (tensor_proto.__getattribute__(expected_field)
== np.frombuffer(params["values"],
@@ -362,6 +372,31 @@ def test_make_tensor_proto_valid_string(params, expected_shape, expected_dtype,
assert tensor_proto.tensor_shape == expected_shape
+@pytest.mark.parametrize("params, expected_shape", [
+ ({"values": "string", "dtype": DataType.DT_STRING},
+ TensorShapeProto(dim=[TensorShapeProto.Dim(size=1)])
+ ),
+ ({"values": ["list", "of", "strings"], "shape": [3], "dtype": DataType.DT_STRING},
+ TensorShapeProto(dim=[TensorShapeProto.Dim(size=3)])
+ ),
+ ({"values": [["nested", "list", "of", "strings"]]},
+ TensorShapeProto(dim=[TensorShapeProto.Dim(size=1), TensorShapeProto.Dim(size=4)])
+ ),
+ # Upon numpy array creation it will be casted to numpy.str_ data type
+ ({"values": [1, 2, "three"]},
+ TensorShapeProto(dim=[TensorShapeProto.Dim(size=3)])
+ ),
+ ({"values": [[1, 2], [3, "four"]]},
+ TensorShapeProto(dim=[TensorShapeProto.Dim(size=2), TensorShapeProto.Dim(size=2)])
+ ),
+])
+def test_make_tensor_proto_valid_string(params, expected_shape):
+ tensor_proto = make_tensor_proto(**params)
+ assert tensor_proto.string_val == np.ravel(params["values"]).astype(np.bytes_).tolist()
+ assert tensor_proto.dtype == DataType.DT_STRING
+ assert tensor_proto.tensor_shape == expected_shape
+
+
def test_make_tensor_proto_valid_string_to_float_dtype():
values = bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00])
tensor_proto = make_tensor_proto(values=values, shape=[3], dtype=DataType.DT_INT16)
@@ -625,40 +660,6 @@ def test_make_tensor_proto_invalid_values_type():
assert str(exception) == "values type should be (list, np.ndarray, scalar), but is tuple"
-def test_make_tensor_proto_invalid_string_2D_array():
- values = bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00])
- with pytest.raises(ValueError) as exception_info:
- make_tensor_proto(values=[[values, values], [values, values]],
- shape=None, dtype=DataType.DT_STRING)
- exception = exception_info.value
- assert str(exception) == "bytes values with dtype DT_STRING must be in shape [N]"
-
-
-def test_make_tensor_proto_invalid_string_reshape():
- values = bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00])
- with pytest.raises(ValueError) as exception_info:
- make_tensor_proto(values=values, shape=[6], dtype=DataType.DT_STRING)
- exception = exception_info.value
- assert str(exception) == "cannot reshape array of size 1 into shape (6,)"
-
-
-def test_make_tensor_proto_invalid_string_reshape_2():
- values = bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00])
- with pytest.raises(ValueError) as exception_info:
- make_tensor_proto(values=values, shape=[2, 3], dtype=DataType.DT_STRING)
- exception = exception_info.value
- assert str(exception) == "bytes values with dtype DT_STRING must be in shape [N]"
-
-
-def test_make_tensor_proto_invalid_string_2D_array_with_shape():
- values = bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00])
- with pytest.raises(ValueError) as exception_info:
- make_tensor_proto(values=[[values, values], [values, values]],
- shape=[2, 2], dtype=DataType.DT_STRING)
- exception = exception_info.value
- assert str(exception) == "bytes values with dtype DT_STRING must be in shape [N]"
-
-
def test_make_tensor_proto_invalid_int_reshape():
values = [1, 2, 3]
with pytest.raises(ValueError) as exception_info:
@@ -673,11 +674,3 @@ def test_make_tensor_proto_invalid_empty_list_of_empty_lists_reshape():
make_tensor_proto(values=values, shape=[4, 2], dtype=DataType.DT_INT8)
exception = exception_info.value
assert str(exception) == "cannot reshape array of size 0 into shape (4,2)"
-
-
-def test_make_tensor_proto_invalid_dtype_provided():
- values = [[1.0, 2.0, 3.0], [1.0, 2.0, 3.0]]
- with pytest.raises(ValueError) as exception_info:
- make_tensor_proto(values=values, shape=None, dtype=DataType.DT_STRING)
- exception = exception_info.value
- assert str(exception) == "bytes values with dtype DT_STRING must be in shape [N]"
diff --git a/client/python/ovmsclient/lib/tests/tfs_compat_http/config.py b/client/python/ovmsclient/lib/tests/tfs_compat_http/config.py
index f31031ba62..66d43e52dd 100644
--- a/client/python/ovmsclient/lib/tests/tfs_compat_http/config.py
+++ b/client/python/ovmsclient/lib/tests/tfs_compat_http/config.py
@@ -20,6 +20,7 @@
import requests
from http import HTTPStatus
from numpy import array, int32, float32, float128
+import numpy as np
from ovmsclient.tfs_compat.protos.tensorflow.core.protobuf.error_codes_pb2 import Code as ErrorCode
from ovmsclient.tfs_compat.base.errors import InvalidInputError, ModelNotFoundError
@@ -57,14 +58,6 @@
"input1": (1, 2, 3)
}, 'model_name', 0, TypeError,
"values type should be (list, np.ndarray, scalar), but is tuple"),
- ({
- "input1": [
- [bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00]),
- bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00])],
- [bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00]),
- bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00])]
- ]
- }, 'model_name', 0, ValueError, "bytes values with dtype DT_STRING must be in shape [N]"),
]
# (inputs_dict,
@@ -85,11 +78,13 @@
({
"input1": 5.0,
- "input2": bytes([1, 2, 3])
+ "input2": bytes([1, 2, 3]),
+ "input3": ["list", "of", "strings"]
}, json.dumps({
"inputs": {
"input1": [5.0],
- "input2": [{"b64": "AQID"}]
+ "input2": [{"b64": "AQID"}],
+ "input3": ["list", "of", "strings"]
}
}), 'model_name', 0),
@@ -119,7 +114,10 @@
([1, 2, 3.0], [1.0, 2.0, 3.0]),
([bytes([1, 2, 3]), bytes([4, 5, 6]), bytes([7, 8, 9])],
- [{"b64": "AQID"}, {"b64": "BAUG"}, {"b64": "BwgJ"}])
+ [{"b64": "AQID"}, {"b64": "BAUG"}, {"b64": "BwgJ"}]),
+
+ ([[bytes([111, 118]), bytes([109, 115])], [bytes([111, 118]), bytes([109, 115])]],
+ [["ov", "ms"], ["ov", "ms"]])
]
# (inputs_dict,
@@ -136,23 +134,13 @@
"The requested array has an inhomogeneous shape after 2 dimensions. "
"The detected shape was (3, 1) + inhomogeneous part.")),
- ([1, 2, 3, "str"],
- TypeError, "provided values type is not valid"),
-
- ([[1, 2], [3, 4], ["five", 6]],
+ (np.array(b'1', dtype=np.dtype(np.void, 10)),
TypeError, "provided values type is not valid"),
(float128(2.5), TypeError, "provided values type is not valid"),
((1, 2, 3), TypeError,
"values type should be (list, np.ndarray, scalar), but is tuple"),
-
- ([
- [bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00]),
- bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00])],
- [bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00]),
- bytes([0x13, 0x00, 0x00, 0x00, 0x08, 0x00])]
- ], ValueError, "bytes values with dtype DT_STRING must be in shape [N]"),
]
# (config_dict,
diff --git a/demos/README.md b/demos/README.md
index aa7b285cc7..ace1a21a46 100644
--- a/demos/README.md
+++ b/demos/README.md
@@ -15,6 +15,8 @@
ovms_demo_single_face_analysis_pipeline
ovms_demo_multi_faces_analysis_pipeline
ovms_docs_demo_ensemble
+ ovms_docs_demo_mediapipe_image_classification
+ ovms_docs_demo_mediapipe_multi_model
ovms_docs_image_classification
ovms_demo_using_onnx_model
ovms_demo_tf_classification
@@ -23,6 +25,7 @@
ovms_demo_real_time_stream_analysis
ovms_demo_bert
ovms_demo_gptj_causal_lm
+ ovms_demo_universal-sentence-encoder
ovms_demo_speech_recognition
ovms_demo_benchmark_client
@@ -48,6 +51,7 @@ OpenVINO Model Server demos have been created to showcase the usage of the model
|[Real Time Stream Analysis](real_time_stream_analysis/python/README.md)| Analyze RTSP video stream in real time with generic application template for custom pre and post processing routines as well as simple results visualizer for displaying predictions in the browser. |
|[Natural Language Processing with BERT](bert_question_answering/python/README.md)|Provide a knowledge source and a query and use BERT model for question answering use case via gRPC API. This demo uses dynamic shape feature. |
|[GPT-J Causal Language Modeling](gptj_causal_lm/python/README.md)|Write start of the sentence and let GPT-J continue via gRPC API. This demo uses dynamic shape feature. |
+|[Using inputs data in string format with universal-sentence-encoder model](universal-sentence-encoder/README.md)| Handling AI model with text as the model input. |
|[Speech Recognition on Kaldi Model](speech_recognition_with_kaldi_model/python/README.md)|Run inference on a speech sample and use Kaldi model to perform speech recognition via gRPC API. This demo uses [stateful model](../docs/stateful_models.md). |
|[Benchmark App](benchmark/python/README.md)|Generate traffic and measure performance of the model served in OpenVINO Model Server.|
|[Face Blur Pipeline](face_blur/python/README.md)|Detect faces and blur image using a pipeline of object detection models with a custom node for intermediate results processing via gRPC API. This demo uses [pipeline](../docs/dag_scheduler.md) with [face_blur custom node](https://github.com/openvinotoolkit/model_server/tree/releases/2022/1/src/custom_nodes/face_blur). |
diff --git a/demos/benchmark/python/client.py b/demos/benchmark/python/client.py
index b6c7e1540e..b3e834a86f 100644
--- a/demos/benchmark/python/client.py
+++ b/demos/benchmark/python/client.py
@@ -363,6 +363,8 @@ def __fix_shape_and_type(self, input_name):
dtype = tensorflow.dtypes.int32
elif dtype == self.DTYPE_INT_64:
dtype = tensorflow.dtypes.int64
+ elif dtype == self.DTYPE_UINT_8:
+ dtype = tensorflow.dtypes.uint8
else: raise ValueError(f"not supported type: {dtype}")
return shape, dtype
diff --git a/demos/bert_question_answering/python/Dockerfile b/demos/bert_question_answering/python/Dockerfile
index da090d2612..b96faef4c9 100644
--- a/demos/bert_question_answering/python/Dockerfile
+++ b/demos/bert_question_answering/python/Dockerfile
@@ -22,4 +22,4 @@ COPY bert_question_answering.py tokens_bert.py html_reader.py requirements.txt .
ADD https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.1/models_bin/2/bert-small-uncased-whole-word-masking-squad-int8-0002/vocab.txt .
RUN pip3 install --upgrade pip && pip3 install --no-cache-dir -r requirements.txt
-ENTRYPOINT ["python3", "bert_question_answering.py", "-v", "vocab.txt", "-i", "https://en.wikipedia.org/wiki/BERT_(language_model)", "--question", "what is bert", "--grpc_port", "9000", "--input_names", "input_ids,attention_mask,token_type_ids,position_ids"]
+ENTRYPOINT ["python3", "bert_question_answering.py", "-v", "vocab.txt", "-i", "https://en.wikipedia.org/w/index.php?title=BERT_(language_model)&oldid=1148859098", "--question", "what is bert", "--grpc_port", "9000", "--input_names", "input_ids,attention_mask,token_type_ids,position_ids"]
diff --git a/demos/bert_question_answering/python/README.md b/demos/bert_question_answering/python/README.md
index 17b7f46c15..e9fa9905d3 100644
--- a/demos/bert_question_answering/python/README.md
+++ b/demos/bert_question_answering/python/README.md
@@ -31,30 +31,30 @@ docker run -it --network host -e no_proxy=localhost bert-client:latest --grpc_ad
Docker image with BERT client by default start the container with a command:
```
-python bert_question_answering.py -v vocab.txt -i "https://en.wikipedia.org/wiki/BERT_(language_model)" --question "what is bert" --grpc_port 9000 --input_names input_ids,attention_mask,token_type_ids,position_ids
+python bert_question_answering.py -v vocab.txt -i "https://en.wikipedia.org/w/index.php?title=BERT_(language_model)&oldid=1148859098" --question "what is bert" --grpc_port 9000 --input_names input_ids,attention_mask,token_type_ids,position_ids
```
You can change the entrypoint to adjust to different parameters
Example of the output snippet:
```bash
question: what is bert
-[ INFO ] Sequence of length 395 is processed with 1.85 requests/sec (0.54 sec per request)
-[ INFO ] Sequence of length 368 is processed with 2.74 requests/sec (0.36 sec per request)
-[ INFO ] Sequence of length 32 is processed with 16.51 requests/sec (0.061 sec per request)
+[ INFO ] Sequence of length 418 is processed with 9.91 requests/sec (0.1 sec per request)
+[ INFO ] Sequence of length 305 is processed with 12.08 requests/sec (0.083 sec per request)
+[ INFO ] Sequence of length 349 is processed with 12.14 requests/sec (0.082 sec per request)
+[ INFO ] Sequence of length 110 is processed with 17.98 requests/sec (0.056 sec per request)
[ INFO ] The performance below is reported only for reference purposes, please use the benchmark_app tool (part of the OpenVINO samples) for any actual measurements.
-[ INFO ] 3 requests were processed in 1.00sec (0.33sec per request)
-[ INFO ] ---answer: 0.26 deeply bidirectional, unsupervised language representation
-[ INFO ] When BERT was published, it achieved state-of-the-art performance on a number of natural language understanding tasks:[1]
-The reasons for BERT's state-of-the-art performance on these natural language understanding tasks are not yet well understood.[8][9] Current research has focused on investigating the relationship behind BERT's output as a result of carefully chosen input sequences,[10][11] analysis of internal vector representations through probing classifiers,[12][13] and the relationships represented by attention weights.[8][9]
-BERT has its origins from pre-training contextual representations including semi-supervised sequence learning,[14] generative pre-training, ELMo,[15] and ULMFit.[16] Unlike previous models, BERT is a deeply bidirectional, unsupervised language representation, pre-trained using only a plain text corpus. Context-free models such as word2vec or GloVe generate a single word embedding representation for each word in the vocabulary, where BERT takes into account the context for each occurrence of a given word. For instance, whereas the vector for "running" will have the same word2vec vector representation for both of its occurrences in the sentences "He is running a company" and "He is running a marathon", BERT will provide a contextualized embedding that will be different according to the sentence.
-On October 25, 2019, Google Search announced that they had started applying BERT models for English language search queries within the US.[17] On December 9, 2019, it was reported that BERT had been adopted by Google Search for over 70 languages.[18] In October 2020, almost every single English-based query was processed by BERT.[19]
-
-[ INFO ] ---answer: 0.22 Bidirectional Encoder Representations from Transformers
-[ INFO ] Bidirectional Encoder Representations from Transformers (BERT) is a transformer-based machine learning technique for natural language processing (NLP) pre-training developed by Google. BERT was created and published in 2018 by Jacob Devlin and his colleagues from Google.[1][2] In 2019, Google announced that it had begun leveraging BERT in its search engine, and by late 2020 it was using BERT in almost every English-language query. A 2020 literature survey concluded that "in a little over a year, BERT has become a ubiquitous baseline in NLP experiments", counting over 150 research publications analyzing and improving the model.[3]
-The original English-language BERT has two models:[1] (1) the BERTBASE: 12 encoders with 12 bidirectional self-attention heads, and (2) the BERTLARGE: 24 encoders with 16 bidirectional self-attention heads. Both models are pre-trained from unlabeled data extracted from the BooksCorpus[4] with 800M words and English Wikipedia with 2,500M words.
-BERT is at its core a transformer language model with a variable number of encoder layers and self-attention heads. The architecture is "almost identical" to the original transformer implementation in Vaswani et al. (2017).[5]
-BERT was pretrained on two tasks: language modeling (15% of tokens were masked and BERT was trained to predict them from context) and next sentence prediction (BERT was trained to predict if a chosen next sentence was probable or not given the first sentence). As a result of the training process, BERT learns contextual embeddings for words. After pretraining, which is computationally expensive, BERT can be finetuned with fewer resources on smaller datasets to optimize its performance on specific tasks.[1][6]
-
-[ INFO ] ---answer: 0.36 The research paper describing BERT
-[ INFO ] The research paper describing BERT won the Best Long Paper Award at the 2019 Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL).[20]
+[ INFO ] 4 requests were processed in 0.34sec (0.086sec per request)
+[ INFO ] ---answer: 0.35 Bidirectional Encoder Representations from Transformers
+[ INFO ] This is the current revision of this page, as edited by Mandarax (talk | contribs) at 19:01, 8 April 2023 (Correct capitalization). The present address (URL) is a permanent link to this version.Bidirectional Encoder Representations from Transformers (BERT) is a family of masked-language models introduced in 2018 by researchers at Google.[1][2] A 2020 literature survey concluded that "in a little over a year, BERT has become a ubiquitous baseline in Natural Language Processing (NLP) experiments counting over 150 research publications analyzing and improving the model."[3]
+BERT was originally implemented in the English language at two model sizes:[1] (1) BERTBASE: 12 encoders with 12 bidirectional self-attention heads totaling 110 million parameters, and (2) BERTLARGE: 24 encoders with 16 bidirectional self-attention heads totaling 340 million parameters. Both models were pre-trained on the Toronto BookCorpus[4] (800M words) and English Wikipedia (2,500M words).
+BERT is based on the transformer architecture. Specifically, BERT is composed of Transformer encoder layers.
+BERT was pre-trained simultaneously on two tasks: language modeling (15% of tokens were masked, and the training objective was to predict the original token given its context) and next sentence prediction (the training objective was to classify if two spans of text appeared sequentially in the training corpus).[5] As a result of this training process, BERT learns latent representations of words and sentences in context. After pre-training, BERT can be fine-tuned with fewer resources on smaller datasets to optimize its performance on specific tasks such as NLP tasks (language inference, text classification) and sequence-to-sequence based language generation tasks (question-answering, conversational response generation).[1][6] The pre-training stage is significantly more computationally expensive than fine-tuning.
+
+[ INFO ] ---answer: 0.14 deeply bidirectional, unsupervised language representation
+[ INFO ] In contrast to deep learning neural networks which require very large amounts of data, BERT has already been pre-trained which means that it has learnt the representations of the words and sentences as well as the underlying semantic relations that they are connected with. BERT can then be fine-tuned on smaller datasets for specific tasks such as sentiment classification. The pre-trained models are chosen according to the content of the given dataset one uses but also the goal of the task. For example, if the task is a sentiment classification task on financial data, a pre-trained model for the analysis of sentiment of financial text should be chosen. The weights of the original pre-trained models were released on GitHub.[16]
+BERT was originally published by Google researchers Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. The design has its origins from pre-training contextual representations, including semi-supervised sequence learning,[17] generative pre-training, ELMo,[18] and ULMFit.[19] Unlike previous models, BERT is a deeply bidirectional, unsupervised language representation, pre-trained using only a plain text corpus. Context-free models such as word2vec or GloVe generate a single word embedding representation for each word in the vocabulary, where BERT takes into account the context for each occurrence of a given word. For instance, whereas the vector for "running" will have the same word2vec vector representation for both of its occurrences in the sentences "He is running a company" and "He is running a marathon", BERT will provide a contextualized embedding that will be different according to the sentence.
+
+[ INFO ] ---answer: 0.12 BERT models for English language search queries within the US
+[ INFO ] On October 25, 2019, Google announced that they had started applying BERT models for English language search queries within the US.[20] On December 9, 2019, it was reported that BERT had been adopted by Google Search for over 70 languages.[21] In October 2020, almost every single English-based query was processed by a BERT model.[22]
+The research paper describing BERT won the Best Long Paper Award at the 2019 Annual Conference of the North American Chapter of the Association for Computational Linguistics (NAACL).[23]
```
diff --git a/demos/bert_question_answering/python/bert_question_answering.py b/demos/bert_question_answering/python/bert_question_answering.py
index 0ddc7ee13a..4bb294e043 100644
--- a/demos/bert_question_answering/python/bert_question_answering.py
+++ b/demos/bert_question_answering/python/bert_question_answering.py
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2021 Intel Corporation
+# Copyright (c) 2021-2023 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -23,11 +23,8 @@
from tokens_bert import text_to_tokens, load_vocab_file, Token
from html_reader import get_paragraphs
-import grpc
import numpy as np
-from tensorflow import make_tensor_proto, make_ndarray
-from tensorflow_serving.apis import predict_pb2
-from tensorflow_serving.apis import prediction_service_pb2_grpc
+import ovmsclient
class ConcatenatedParagraph():
def __init__(self, text="", tokens=[]):
@@ -102,8 +99,7 @@ def main():
args = build_argparser().parse_args()
# create grpc connection
- channel = grpc.insecure_channel("{}:{}".format(args.grpc_address,args.grpc_port))
- stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
+ client = ovmsclient.make_grpc_client("{}:{}".format(args.grpc_address,args.grpc_port))
if args.colors:
COLOR_RED = "\033[91m"
@@ -183,23 +179,9 @@ def main():
if len(input_names)>3:
inputs[input_names[3]] = np.arange(input_ids_length, dtype=np.int64)[None,:]
-
- #print("inputs:",inputs)
-
- # create grpc prediction request
- request = predict_pb2.PredictRequest()
- request.model_spec.name = args.model_name
- for inp_name in inputs:
- request.inputs[inp_name].CopyFrom(make_tensor_proto(inputs[inp_name], shape=(inputs[inp_name].shape)))
-
t_start = time.perf_counter()
- result = stub.Predict(request, 10.0) # result includes a dictionary with all model outputs
+ res = client.predict(inputs, args.model_name, timeout=10.0)
t_end = time.perf_counter()
- #print("\nresult:", result)
- res = {}
- for out_name in output_names:
- # print("out_name:",out_name)
- res[out_name] = make_ndarray(result.outputs[out_name])
t_count += 1
log.info("Sequence of length {} is processed with {:0.2f} requests/sec ({:0.2} sec per request)".format(
diff --git a/demos/bert_question_answering/python/requirements.txt b/demos/bert_question_answering/python/requirements.txt
index a553919019..0deafd1de3 100644
--- a/demos/bert_question_answering/python/requirements.txt
+++ b/demos/bert_question_answering/python/requirements.txt
@@ -1,2 +1,2 @@
-tensorflow-serving-api==2.11.0
+ovmsclient
validators
diff --git a/demos/common/cpp/.bazelrc b/demos/common/cpp/.bazelrc
index 40ca1e660c..bf1c4f30f3 100644
--- a/demos/common/cpp/.bazelrc
+++ b/demos/common/cpp/.bazelrc
@@ -62,6 +62,10 @@ build --cxxopt=-fno-strict-overflow
build --cxxopt=-fno-delete-null-pointer-checks
build --cxxopt=-fwrapv
build --cxxopt=-fstack-protector
+build --cxxopt=-fstack-clash-protection
+build --cxxopt=-Wformat
+build --cxxopt=-Wformat-security
+build --cxxopt=-Werror=format-security
# Adding "--cxxopt=-D_GLIBCXX_USE_CXX11_ABI=0" creates parity with TF
# compilation options. It also addresses memory use due to
diff --git a/demos/common/cpp/Dockerfile b/demos/common/cpp/Dockerfile
index bb0e47805a..dcb04fd8d6 100644
--- a/demos/common/cpp/Dockerfile
+++ b/demos/common/cpp/Dockerfile
@@ -101,7 +101,7 @@ RUN bazel build \
@com_github_grpc_grpc//:grpc++ \
@com_google_protobuf//:protobuf_lite \
@org_tensorflow//tensorflow/core:lib \
- @opencv//:opencv
+ @linux_opencv//:opencv
COPY src/ /build/src/
diff --git a/demos/common/cpp/WORKSPACE b/demos/common/cpp/WORKSPACE
index 3479907a50..69bea5d63c 100644
--- a/demos/common/cpp/WORKSPACE
+++ b/demos/common/cpp/WORKSPACE
@@ -67,7 +67,7 @@ grpc_extra_deps()
##################### OPEN CV ######################
# OPENCV DEFINITION FOR BUILDING FROM BINARY RELEASE: ##########################
new_local_repository(
- name = "opencv",
+ name = "linux_opencv",
build_file = "@//third_party/opencv:BUILD",
path = "/opt/opencv",
)
diff --git a/demos/common/cpp/src/BUILD b/demos/common/cpp/src/BUILD
index cf52e6d471..04b4779d06 100644
--- a/demos/common/cpp/src/BUILD
+++ b/demos/common/cpp/src/BUILD
@@ -41,7 +41,7 @@ cc_binary(
"@com_google_protobuf//:protobuf_lite",
"@org_tensorflow//tensorflow/core:framework",
"@org_tensorflow//tensorflow/core:lib",
- "@opencv//:opencv",
+ "@linux_opencv//:opencv",
]
)
@@ -57,7 +57,7 @@ cc_binary(
"@com_google_protobuf//:protobuf_lite",
"@org_tensorflow//tensorflow/core:framework",
"@org_tensorflow//tensorflow/core:lib",
- "@opencv//:opencv",
+ "@linux_opencv//:opencv",
]
)
diff --git a/demos/gptj_causal_lm/python/.gitignore b/demos/gptj_causal_lm/python/.gitignore
index 5f48824542..b792e9c0e4 100644
--- a/demos/gptj_causal_lm/python/.gitignore
+++ b/demos/gptj_causal_lm/python/.gitignore
@@ -1,3 +1,4 @@
.venv/
local-pt-checkpoint/
-onnx/
\ No newline at end of file
+onnx/
+workspace/
diff --git a/demos/gptj_causal_lm/python/Makefile b/demos/gptj_causal_lm/python/Makefile
new file mode 100644
index 0000000000..636d6eabe8
--- /dev/null
+++ b/demos/gptj_causal_lm/python/Makefile
@@ -0,0 +1,38 @@
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+BLINGFIRE_SHA=5089d31914cbed7a24589e753bd6cd362a377fbb
+
+.PHONY: setup
+
+default: setup
+
+setup:
+# Build custom node
+ cd ../../../src/custom_nodes/tokenizer && \
+ make BASE_OS=ubuntu
+ mkdir -p workspace/lib && \
+ cp ../../../src/custom_nodes/tokenizer/lib/ubuntu/libdetokenizer.so workspace/lib/libdetokenizer.so && \
+ cp ../../../src/custom_nodes/tokenizer/lib/ubuntu/libtokenizer.so workspace/lib/libtokenizer.so
+# Prepare tokenization models
+ mkdir -p workspace/tokenizers
+ wget https://github.com/microsoft/BlingFire/raw/${BLINGFIRE_SHA}/ldbsrc/ldb/gpt2.bin -O workspace/tokenizers/gpt2.bin
+ wget https://github.com/microsoft/BlingFire/raw/${BLINGFIRE_SHA}/ldbsrc/ldb/gpt2.i2w -O workspace/tokenizers/gpt2.i2w
+# Copy configuration file to workspace directory
+ cp config.json workspace/.
+
+clean:
+ @rm -rf workspace
\ No newline at end of file
diff --git a/demos/gptj_causal_lm/python/README.md b/demos/gptj_causal_lm/python/README.md
index 18fce21916..d38e82c27e 100755
--- a/demos/gptj_causal_lm/python/README.md
+++ b/demos/gptj_causal_lm/python/README.md
@@ -101,3 +101,58 @@ tensor([[[ 8.4078, 7.2025, 5.1148, ..., -6.6914, -6.7891, -6.6537],
grad_fn=)
predicted word: a
```
+
+# Pipeline mode with server side tokenization and detokenization
+
+This variant offloads tokenizaton and detokenization step from client to the server. OVMS can convert string proto to `2D U8` tensor and pass the data to tokenization custom node. This way we generate tokens for `gpt-j-6b` model automatically and get the response as text instead of probability vector.
+
+![diagram](../../../src/custom_nodes/tokenizer/diagram.svg)
+
+## Prepare environment
+
+Use `make` command to prepare custom node libraries, blingfire tokenization models and configuration file.
+
+```bash
+make
+```
+
+Workspace should look as follows:
+
+```bash
+tree workspace
+workspace
+├── config.json
+├── lib
+│ ├── libdetokenizer.so
+│ └── libtokenizer.so
+└── tokenizers
+ ├── gpt2.bin
+ └── gpt2.i2w
+
+2 directories, 5 files
+```
+
+Start OVMS with prepared workspace:
+
+```bash
+docker run -d --rm -p 9000:9000 \
+ -v $(pwd)/onnx:/onnx:ro \
+ -v $(pwd)/workspace:/workspace:ro \
+ openvino/model_server \
+ --port 9000 \
+ --config_path /workspace/config.json
+```
+
+Install Tensorflow Serving API package:
+```bash
+pip install --upgrade pip
+pip install tensorflow-serving-api==2.11.0
+```
+
+Run example client:
+
+```bash
+python3 dag_client.py --url localhost:9000 --model_name my_gpt_pipeline --input "Neurons are fascinating"
+
+0.5515012741088867 Neurons are fascinating cells that are responsible for the transmission of information from one brain region to another. They are also responsible for the production of hormones and neurotransmitters that are responsible for the regulation of mood, sleep, appetite, and sexual function.
+```
diff --git a/demos/gptj_causal_lm/python/app.py b/demos/gptj_causal_lm/python/app.py
index 59ab24a8a7..d265c92c5d 100644
--- a/demos/gptj_causal_lm/python/app.py
+++ b/demos/gptj_causal_lm/python/app.py
@@ -48,13 +48,11 @@
predicted_token_id = token = torch.argmax(torch.nn.functional.softmax(torch.Tensor(results[0,-1,:]),dim=-1),dim=-1)
word = tokenizer.decode(predicted_token_id)
input_sentence += word
- # print(f"Iteration: {iteration}\nLast predicted token: {predicted_token_id}\nLast latency: {last_latency}s\n{input_sentence}")
print(word, end='', flush=True)
iteration += 1
if predicted_token_id == args['eos_token_id']:
break
-# split line below to 3 different lines
print(f"Number of iterations: {iteration}")
print(f"First latency: {first_latency}s")
print(f"Last latency: {last_latency}s")
diff --git a/demos/gptj_causal_lm/python/config.json b/demos/gptj_causal_lm/python/config.json
new file mode 100644
index 0000000000..c4b4b9f1d4
--- /dev/null
+++ b/demos/gptj_causal_lm/python/config.json
@@ -0,0 +1,123 @@
+{
+ "model_config_list": [
+ {"config": {
+ "name": "gpt",
+ "base_path": "/onnx",
+ "plugin_config": {"PERFORMANCE_HINT": "LATENCY", "NUM_STREAMS": 1}}}
+ ],
+ "pipeline_config_list": [
+ {
+ "name": "my_gpt_pipeline",
+ "inputs": [
+ "texts"
+ ],
+ "nodes": [
+ {
+ "name": "node_1",
+ "type": "custom",
+ "inputs": [
+ {
+ "texts": {
+ "node_name": "request",
+ "data_item": "texts"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "data_item": "input_ids",
+ "alias": "out"
+ },
+ {
+ "data_item": "attention_mask",
+ "alias": "attention"
+ }
+ ],
+ "library_name": "tokenizer",
+ "params": {
+ "max_ids_arr_length": "4096",
+ "model_path": "/workspace/tokenizers/gpt2.bin"
+ }
+ },
+ {
+ "name": "gpt_node",
+ "model_name": "gpt",
+ "type": "DL model",
+ "inputs": [
+ {
+ "input_ids": {
+ "node_name": "node_1",
+ "data_item": "out"
+ }
+ },
+ {
+ "attention_mask": {
+ "node_name": "node_1",
+ "data_item": "attention"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "data_item": "logits",
+ "alias": "logits"
+ }
+ ]
+ },
+ {
+ "name": "node_2",
+ "type": "custom",
+ "inputs": [
+ {
+ "logits": {
+ "node_name": "gpt_node",
+ "data_item": "logits"
+ }
+ },
+ {
+ "input_ids": {
+ "node_name": "node_1",
+ "data_item": "out"
+ }
+ },
+ {
+ "attention_mask": {
+ "node_name": "node_1",
+ "data_item": "attention"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "data_item": "texts",
+ "alias": "texts"
+ }
+ ],
+ "library_name": "detokenizer",
+ "params": {
+ "max_buffer_length": "8192",
+ "model_path": "/workspace/tokenizers/gpt2.i2w"
+ }
+ }
+ ],
+ "outputs": [
+ {
+ "autocompletions_string": {
+ "node_name": "node_2",
+ "data_item": "texts"
+ }
+ }
+ ]
+ }
+ ],
+ "custom_node_library_config_list": [
+ {
+ "name": "tokenizer",
+ "base_path": "/workspace/lib/libtokenizer.so"
+ },
+ {
+ "name": "detokenizer",
+ "base_path": "/workspace/lib/libdetokenizer.so"
+ }
+ ]
+}
diff --git a/demos/gptj_causal_lm/python/dag_client.py b/demos/gptj_causal_lm/python/dag_client.py
new file mode 100644
index 0000000000..e81ec9c183
--- /dev/null
+++ b/demos/gptj_causal_lm/python/dag_client.py
@@ -0,0 +1,48 @@
+#
+# Copyright 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import os
+import time
+import argparse
+import grpc
+import numpy as np
+from tensorflow_serving.apis import predict_pb2
+from tensorflow_serving.apis import prediction_service_pb2_grpc
+from tensorflow import make_tensor_proto, make_ndarray
+
+parser = argparse.ArgumentParser(description='Demo for GPT-J causal LM DAG requests using Tensorflow Serving gRPC API')
+
+parser.add_argument('--input', required=True, help='Beginning of a sentence', type=str)
+parser.add_argument('--url', required=False, help='Url to connect to', type=str, default='localhost:9000')
+parser.add_argument('--model_name', required=False, help='Model name in the serving', type=str, default='my_gpt_pipeline')
+args = vars(parser.parse_args())
+
+channel = grpc.insecure_channel(args['url'])
+stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
+
+input_sentence = args['input']
+
+predict_request = predict_pb2.PredictRequest()
+predict_request.model_spec.name = args['model_name']
+
+
+while True:
+ predict_request.inputs['texts'].CopyFrom(make_tensor_proto(np.array([input_sentence])))
+ start_time = time.time()
+ predict_response = stub.Predict(predict_request, 10.0)
+ latency = time.time() - start_time
+ results = make_ndarray(predict_response.outputs['autocompletions_string'])
+ input_sentence = results[0].decode('utf-8')
+ print(latency, input_sentence)
diff --git a/demos/gptj_causal_lm/python/requirements.txt b/demos/gptj_causal_lm/python/requirements.txt
index 7e30bb972b..0bfaa08f5b 100755
--- a/demos/gptj_causal_lm/python/requirements.txt
+++ b/demos/gptj_causal_lm/python/requirements.txt
@@ -1,7 +1,7 @@
-accelerate
+accelerate==0.17.1
numpy==1.23
-transformers
-transformers[onnx]
-onnxruntime
-torch
-ovmsclient
\ No newline at end of file
+transformers==4.26.1
+transformers[onnx]==4.26.1
+onnxruntime==1.14.1
+torch==1.13.1
+ovmsclient
diff --git a/demos/horizontal_text_detection/python/horizontal_text_detection.py b/demos/horizontal_text_detection/python/horizontal_text_detection.py
index 68baad2ee8..141fdd5c2c 100644
--- a/demos/horizontal_text_detection/python/horizontal_text_detection.py
+++ b/demos/horizontal_text_detection/python/horizontal_text_detection.py
@@ -72,7 +72,7 @@ def get_text(output):
def draw_boxes_spotting(frame, result):
output = make_ndarray(result.outputs['boxes'])
- for i in range(0, 100): # there is returned 200 detections for each image in the batch
+ for i in range(0, output.shape[0]): # there is returned a dynamic list of boxes
detection = output[i,:]
if detection[4] > 0.3:
x_min = int(detection[0])
diff --git a/demos/mediapipe/image_classification/README.md b/demos/mediapipe/image_classification/README.md
new file mode 100644
index 0000000000..16fba3c0a1
--- /dev/null
+++ b/demos/mediapipe/image_classification/README.md
@@ -0,0 +1,82 @@
+# MediaPipe Image Classification Demo {#ovms_docs_demo_mediapipe_image_classification}
+
+This guide shows how to implement [MediaPipe](../../../docs/mediapipe.md) graph using OVMS.
+
+Example usage of graph that contains only one model - resnet:
+
+## Prepare the repository
+
+Clone the repository and enter mediapipe image_classification directory
+```bash
+git clone https://github.com/openvinotoolkit/model_server.git
+cd model_server/demos/mediapipe/image_classification
+```
+
+## Download ResNet50 model
+
+```bash
+mkdir -p model/1
+wget -P model/1 https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.1/models_bin/2/resnet50-binary-0001/FP32-INT1/resnet50-binary-0001.bin
+wget -P model/1 https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.1/models_bin/2/resnet50-binary-0001/FP32-INT1/resnet50-binary-0001.xml
+```
+
+## Run OpenVINO Model Server
+```bash
+docker run -d -v $PWD:/mediapipe -p 9000:9000 openvino/model_server:latest --config_path /mediapipe/config.json --port 9000
+```
+
+## Run the client:
+```bash
+cd model_server/client/python/kserve-api/samples
+
+python grpc_infer_resnet.py --model_name resnetMediapipe --grpc_port 9008 --images_numpy_path . --transpose_input False
+./../imgs.npy --input_name in --output_name out --labels_numpy_path ../../lbs.npy
+Image data range: 0.0 : 255.0
+Start processing:
+ Model name: resnetMediapipe
+ Iterations: 10
+ Images numpy path: ../../imgs.npy
+ Numpy file shape: (10, 3, 224, 224)
+
+Iteration 1; Processing time: 14.40 ms; speed 69.46 fps
+imagenet top results in a single batch:
+ 0 airliner 404 ; Correct match.
+Iteration 2; Processing time: 10.72 ms; speed 93.32 fps
+imagenet top results in a single batch:
+ 0 Arctic fox, white fox, Alopex lagopus 279 ; Correct match.
+Iteration 3; Processing time: 9.27 ms; speed 107.83 fps
+imagenet top results in a single batch:
+ 0 bee 309 ; Correct match.
+Iteration 4; Processing time: 8.47 ms; speed 118.02 fps
+imagenet top results in a single batch:
+ 0 golden retriever 207 ; Correct match.
+Iteration 5; Processing time: 9.17 ms; speed 109.03 fps
+imagenet top results in a single batch:
+ 0 gorilla, Gorilla gorilla 366 ; Correct match.
+Iteration 6; Processing time: 8.56 ms; speed 116.78 fps
+imagenet top results in a single batch:
+ 0 magnetic compass 635 ; Correct match.
+Iteration 7; Processing time: 8.39 ms; speed 119.16 fps
+imagenet top results in a single batch:
+ 0 peacock 84 ; Correct match.
+Iteration 8; Processing time: 8.44 ms; speed 118.44 fps
+imagenet top results in a single batch:
+ 0 pelican 144 ; Correct match.
+Iteration 9; Processing time: 8.36 ms; speed 119.55 fps
+imagenet top results in a single batch:
+ 0 snail 113 ; Correct match.
+Iteration 10; Processing time: 9.16 ms; speed 109.19 fps
+imagenet top results in a single batch:
+ 0 zebra 340 ; Correct match.
+
+processing time for all iterations
+average time: 9.10 ms; average speed: 109.89 fps
+median time: 8.50 ms; median speed: 117.65 fps
+max time: 14.00 ms; min speed: 71.43 fps
+min time: 8.00 ms; max speed: 125.00 fps
+time percentile 90: 10.40 ms; speed percentile 90: 96.15 fps
+time percentile 50: 8.50 ms; speed percentile 50: 117.65 fps
+time standard deviation: 1.76
+time variance: 3.09
+Classification accuracy: 100.00
+```
diff --git a/demos/mediapipe/image_classification/config.json b/demos/mediapipe/image_classification/config.json
new file mode 100644
index 0000000000..b82e499487
--- /dev/null
+++ b/demos/mediapipe/image_classification/config.json
@@ -0,0 +1,4 @@
+{
+ "model_config_list": [],
+ "mediapipe_config_list": [{"name":"resnetMediapipe"}]
+}
diff --git a/demos/mediapipe/image_classification/graph.pbtxt b/demos/mediapipe/image_classification/graph.pbtxt
new file mode 100644
index 0000000000..fae7b39fed
--- /dev/null
+++ b/demos/mediapipe/image_classification/graph.pbtxt
@@ -0,0 +1,45 @@
+#
+# Copyright 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+input_stream: "in"
+output_stream: "out"
+node {
+ calculator: "ModelAPISessionCalculator"
+ output_side_packet: "SESSION:session"
+ node_options: {
+ [type.googleapis.com / mediapipe.ModelAPIOVMSSessionCalculatorOptions]: {
+ servable_name: "resnet"
+ servable_version: "1"
+ }
+ }
+}
+node {
+ calculator: "ModelAPISideFeedCalculator"
+ input_side_packet: "SESSION:session"
+ input_stream: "B:in"
+ output_stream: "A:out"
+ node_options: {
+ [type.googleapis.com / mediapipe.ModelAPIInferenceCalculatorOptions]: {
+ tag_to_input_tensor_names {
+ key: "B"
+ value: "0"
+ }
+ tag_to_output_tensor_names {
+ key: "A"
+ value: "1463"
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/demos/mediapipe/image_classification/subconfig.json b/demos/mediapipe/image_classification/subconfig.json
new file mode 100644
index 0000000000..18e7e84c2f
--- /dev/null
+++ b/demos/mediapipe/image_classification/subconfig.json
@@ -0,0 +1,9 @@
+{
+ "model_config_list": [
+ {"config": {
+ "name": "resnet",
+ "base_path": "model"
+ }
+ }
+ ]
+}
diff --git a/demos/mediapipe/multi_model_graph/README.md b/demos/mediapipe/multi_model_graph/README.md
new file mode 100644
index 0000000000..78beb7078e
--- /dev/null
+++ b/demos/mediapipe/multi_model_graph/README.md
@@ -0,0 +1,33 @@
+# MediaPipe Multi Model Demo {#ovms_docs_demo_mediapipe_multi_model}
+
+This guide shows how to implement [MediaPipe](../../../docs/mediapipe.md) graph using OVMS.
+
+Example usage:
+
+## Prepare the repository
+
+Clone the repository and enter mediapipe image_classification directory
+```bash
+git clone https://github.com/openvinotoolkit/model_server.git
+cd model_server/demos/mediapipe/multi_model_graph
+```
+
+## Download ResNet50 model
+
+```bash
+cp -r ../../../src/test/add_two_inputs_model ./
+cp -r ../../../src/test/dummy ./
+```
+
+## Run OpenVINO Model Server
+Prepare virtualenv according to [kserve samples readme](https://github.com/openvinotoolkit/model_server/blob/main/client/python/kserve-api/samples/README.md)
+```bash
+docker run -d -v $PWD:/mediapipe -p 9000:9000 openvino/model_server:latest --config_path /mediapipe/config.json --port 9000
+```
+
+## Run the client:
+```bash
+python mediapipe_multi_model_client.py --grpc_port 9000
+Output:
+[[ 3. 5. 7. 9. 11. 13. 15. 17. 19. 21.]]
+```
diff --git a/demos/mediapipe/multi_model_graph/config.json b/demos/mediapipe/multi_model_graph/config.json
new file mode 100644
index 0000000000..19a1b44353
--- /dev/null
+++ b/demos/mediapipe/multi_model_graph/config.json
@@ -0,0 +1,4 @@
+{
+ "model_config_list": [],
+ "mediapipe_config_list": [{"name":"dummyAdd"}]
+}
diff --git a/demos/mediapipe/multi_model_graph/graph.pbtxt b/demos/mediapipe/multi_model_graph/graph.pbtxt
new file mode 100644
index 0000000000..36a29cb03b
--- /dev/null
+++ b/demos/mediapipe/multi_model_graph/graph.pbtxt
@@ -0,0 +1,79 @@
+#
+# Copyright 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+input_stream: "in1"
+input_stream: "in2"
+output_stream: "out"
+node {
+ calculator: "ModelAPISessionCalculator"
+ output_side_packet: "SESSION:dummy"
+ node_options: {
+ [type.googleapis.com / mediapipe.ModelAPIOVMSSessionCalculatorOptions]: {
+ servable_name: "dummy"
+ servable_version: "1"
+ }
+ }
+}
+node {
+ calculator: "ModelAPISessionCalculator"
+ output_side_packet: "SESSION:add"
+ node_options: {
+ [type.googleapis.com / mediapipe.ModelAPIOVMSSessionCalculatorOptions]: {
+ servable_name: "add"
+ servable_version: "1"
+ }
+ }
+}
+node {
+ calculator: "ModelAPISideFeedCalculator"
+ input_side_packet: "SESSION:dummy"
+ input_stream: "DUMMY_IN:in1"
+ output_stream: "DUMMY_OUT:dummy_output"
+ node_options: {
+ [type.googleapis.com / mediapipe.ModelAPIInferenceCalculatorOptions]: {
+ tag_to_input_tensor_names {
+ key: "DUMMY_IN"
+ value: "b"
+ }
+ tag_to_output_tensor_names {
+ key: "DUMMY_OUT"
+ value: "a"
+ }
+ }
+ }
+}
+node {
+ calculator: "ModelAPISideFeedCalculator"
+ input_side_packet: "SESSION:add"
+ input_stream: "ADD_INPUT1:dummy_output"
+ input_stream: "ADD_INPUT2:in2"
+ output_stream: "SUM:out"
+ node_options: {
+ [type.googleapis.com / mediapipe.ModelAPIInferenceCalculatorOptions]: {
+ tag_to_input_tensor_names {
+ key: "ADD_INPUT1"
+ value: "input1"
+ }
+ tag_to_input_tensor_names {
+ key: "ADD_INPUT2"
+ value: "input2"
+ }
+ tag_to_output_tensor_names {
+ key: "SUM"
+ value: "sum"
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/demos/mediapipe/multi_model_graph/mediapipe_multi_model_client.py b/demos/mediapipe/multi_model_graph/mediapipe_multi_model_client.py
new file mode 100644
index 0000000000..8ef3014d33
--- /dev/null
+++ b/demos/mediapipe/multi_model_graph/mediapipe_multi_model_client.py
@@ -0,0 +1,48 @@
+#
+# Copyright (c) 2022 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import numpy as np
+import datetime
+import argparse
+import tritonclient.grpc as grpcclient
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='Sends requests via KServe gRPC API using images in numpy format. '
+ 'It displays performance statistics and optionally the model accuracy')
+ parser.add_argument('--grpc_address',required=False, default='localhost', help='Specify url to grpc service. default:localhost')
+ parser.add_argument('--grpc_port',required=False, default=9000, help='Specify port to grpc service. default: 9000')
+
+ args = vars(parser.parse_args())
+
+ address = "{}:{}".format(args['grpc_address'],args['grpc_port'])
+
+ triton_client = grpcclient.InferenceServerClient(
+ url=address,
+ verbose=False)
+
+ inputs = []
+ inputs.append(grpcclient.InferInput("in1", [1,10], "FP32"))
+ inputs.append(grpcclient.InferInput("in2", [1,10], "FP32"))
+ inputs[0].set_data_from_numpy(np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]], dtype=np.float32))
+ inputs[1].set_data_from_numpy(np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]], dtype=np.float32))
+ outputs = []
+ results = triton_client.infer(
+ model_name= "dummyAdd",
+ inputs=inputs,
+ outputs=outputs)
+ output = results.as_numpy("out")
+ print('Output:')
+ print(output)
\ No newline at end of file
diff --git a/demos/mediapipe/multi_model_graph/subconfig.json b/demos/mediapipe/multi_model_graph/subconfig.json
new file mode 100644
index 0000000000..43c53be16e
--- /dev/null
+++ b/demos/mediapipe/multi_model_graph/subconfig.json
@@ -0,0 +1,14 @@
+{
+ "model_config_list": [
+ {"config": {
+ "name": "dummy",
+ "base_path": "dummy"
+ }
+ },
+ {"config": {
+ "name": "add",
+ "base_path": "add_two_inputs_model"
+ }
+ }
+ ]
+}
diff --git a/demos/model_ensemble/python/README.md b/demos/model_ensemble/python/README.md
index b07f1bcdeb..e91764d3fe 100644
--- a/demos/model_ensemble/python/README.md
+++ b/demos/model_ensemble/python/README.md
@@ -3,7 +3,7 @@
This guide shows how to implement a model ensemble using the [DAG Scheduler](../../../docs/dag_scheduler.md).
- Let's consider you develop an application to perform image classification. There are many different models that can be used for this task. The goal is to combine results from inferences executed on two different models and calculate argmax to pick the most probable classification label.
-- For this task, select two models: [googlenet-v2](https://docs.openvino.ai/2022.2/omz_models_public_googlenet_v2_tf_googlenet_v2_tf.html) and [resnet-50](https://docs.openvino.ai/2022.2/omz_models_public_resnet_50_tf_resnet_50_tf.html). Additionally, create own model **argmax** to combine and select top result. The aim is to perform this task on the server side with no intermediate results passed over the network. The server should take care of feeding inputs/outputs in subsequent models. Both - googlenet and resnet predictions should run in parallel.
+- For this task, select two models: [googlenet-v2](https://docs.openvino.ai/2023.0/omz_models_model_googlenet_v2_tf.html) and [resnet-50](https://docs.openvino.ai/2022.2/omz_models_model_resnet_50_tf.html#doxid-omz-models-model-resnet-50-tf). Additionally, create own model **argmax** to combine and select top result. The aim is to perform this task on the server side with no intermediate results passed over the network. The server should take care of feeding inputs/outputs in subsequent models. Both - googlenet and resnet predictions should run in parallel.
- Diagram for this pipeline would look like this:
![diagram](model_ensemble_diagram.svg)
diff --git a/demos/universal-sentence-encoder/README.md b/demos/universal-sentence-encoder/README.md
new file mode 100644
index 0000000000..80f42a2695
--- /dev/null
+++ b/demos/universal-sentence-encoder/README.md
@@ -0,0 +1,108 @@
+# Using inputs data in string format with universal-sentence-encoder model {#ovms_demo_universal-sentence-encoder}
+
+
+## Download the model
+
+In this experiment we are going to use a TensorFlow model from [tfhub.dev ](https://tfhub.dev/google/universal-sentence-encoder-multilingual/3).
+
+```bash
+curl --create-dir https://storage.googleapis.com/tfhub-modules/google/universal-sentence-encoder-multilingual/3.tar.gz -o universal-sentence-encoder-multilingual/1/3.tar.gz
+tar -xzf universal-sentence-encoder-multilingual/1/3.tar.gz -C universal-sentence-encoder-multilingual/1/
+rm universal-sentence-encoder-multilingual/1/3.tar.gz
+chmod -R 755 universal-sentence-encoder-multilingual
+tree universal-sentence-encoder-multilingual/
+
+universal-sentence-encoder-multilingual/
+└── 1
+ ├── assets
+ ├── saved_model.pb
+ └── variables
+ ├── variables.data-00000-of-00001
+ └── variables.index
+
+```
+
+## Optionally build OVMS with CPU extension library for sentencepiece_tokenizer layer
+
+Model universal-sentence-encoder-multilingual includes a layer SentencepieceTokenizer which is not supported by OpenVINO at the moment. It can be however implemented using a [CPU extension](https://github.com/openvinotoolkit/openvino_contrib/tree/master/modules/custom_operations/user_ie_extensions/sentence_piece), which is a dynamic library performing the execution of the model layer.
+The layer SentencepieceTokenizer expects on the input a list of strings. The CPU extension replaces the input format to an array with UINT8 precision with a shape `[-1]`. It is serialized representation of the list of strings in a form or bytes. When this extension is deployed in OpenVINO Model Server, you don't need to worry about the serialization as it is handled internally. The model server accepts the input in a string format and performs the conversion to OpenVINO requirement transparently.
+
+The image `openvino/model_server:2023.0` will include ready to use OpenVINO Model Server with the CPU extension. It can be also built from source using the commands:
+
+```bash
+git clone https://github.com/openvinotoolkit/model_server
+cd model_server
+make docker_build OV_USE_BINARY=0
+cd ..
+
+```
+
+## Start the model server in a container
+When the new docker image is built, you can start the service with a command:
+```bash
+docker run -d --name ovms -p 9000:9000 -p 8000:8000 -v $(pwd)/universal-sentence-encoder-multilingual:/model openvino/model_server:latest --model_name usem --model_path /model --cpu_extension /ovms/lib/libuser_ov_extensions.so --plugin_config '{"NUM_STREAMS": 1}' --port 9000 --rest_port 8000
+```
+
+Check the container logs to confirm successful start:
+```bash
+docker logs ovms
+```
+
+
+## Send string data as inference request
+
+OpenVINO Model Server can accept the input in a form of strings. Below is a code snipped based on `tensorflow_serving_api` python library:
+```python
+data = np.array(["string1", "string1", "string_n"])
+predict_request = predict_pb2.PredictRequest()
+predict_request.model_spec.name = "my_model"
+predict_request.inputs["input_name"].CopyFrom(make_tensor_proto(data))
+predict_response = prediction_service_stub.Predict(predict_request, 10.0)
+```
+
+Here is a basic client execution :
+```bash
+pip install --upgrade pip
+pip install -r model_server/demos/universal-sentence-encoder/requirements.txt
+python model_server/demos/universal-sentence-encoder/send_strings.py --grpc_port 9000 --string "I enjoy taking long walks along the beach with my dog."
+processing time 6.931 ms.
+Output shape (1, 512)
+Output subset [-0.00552395 0.00599533 -0.01480555 0.01098945 -0.09355522 -0.08445048
+ -0.02802683 -0.05219319 -0.0675998 0.03127321 -0.03223499 -0.01282092
+ 0.06131846 0.02626886 -0.00983501 0.00298059 0.00141201 0.03229365
+ 0.06957124 0.01543707]
+
+```
+
+The same can be achieved using REST API interface and even a simple `curl` command:
+
+```bash
+curl -X POST http://localhost:8000/v1/models/usem:predict \
+-H 'Content-Type: application/json' \
+-d '{"instances": ["dog", "Puppies are nice.", "I enjoy taking long walks along the beach with my dog."]}'
+```
+
+
+## Compare results with TFS
+
+The same client code can be used to send the requests to TensorFlow Serving component. There is full compatibility in the API.
+
+Start TFS container:
+```bash
+docker run -it -p 8500:8500 -p 9500:9500 -v $(pwd)/universal-sentence-encoder-multilingual:/models/usem -e MODEL_NAME=usem tensorflow/serving --port=9500 --rest_api_port=8500
+```
+
+
+Run the client
+```bash
+python model_server/demos/universal-sentence-encoder/send_strings.py --grpc_port 9500 --input_name inputs --output_name outputs --string "I enjoy taking long walks along the beach with my dog."
+
+processing time 12.167000000000002 ms.
+Output shape (1, 512)
+Output subset [-0.00552387 0.00599531 -0.0148055 0.01098951 -0.09355522 -0.08445048
+ -0.02802679 -0.05219323 -0.06759984 0.03127313 -0.03223493 -0.01282088
+ 0.06131843 0.02626882 -0.00983502 0.00298053 0.00141208 0.03229369
+ 0.06957125 0.01543701]
+
+```
+
diff --git a/demos/universal-sentence-encoder/requirements.txt b/demos/universal-sentence-encoder/requirements.txt
new file mode 100644
index 0000000000..da859d5245
--- /dev/null
+++ b/demos/universal-sentence-encoder/requirements.txt
@@ -0,0 +1,2 @@
+tensorflow-serving-api==2.11.0
+
diff --git a/demos/universal-sentence-encoder/send_strings.py b/demos/universal-sentence-encoder/send_strings.py
new file mode 100644
index 0000000000..5f4de71da3
--- /dev/null
+++ b/demos/universal-sentence-encoder/send_strings.py
@@ -0,0 +1,50 @@
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import grpc
+import numpy as np
+from tensorflow import make_tensor_proto, make_ndarray, make_tensor_proto
+import datetime
+import argparse
+from tensorflow_serving.apis import predict_pb2
+from tensorflow_serving.apis import prediction_service_pb2_grpc
+
+
+parser = argparse.ArgumentParser(description='Do requests to ie_serving and tf_serving using images in string format')
+parser.add_argument('--grpc_address',required=False, default='localhost', help='Specify url to grpc service. default:localhost')
+parser.add_argument('--grpc_port',required=False, default=9000, help='Specify port to grpc service. default: 9000')
+parser.add_argument('--input_name',required=False, default='inputs', help='Specify input tensor name. default: inputs')
+parser.add_argument('--output_name',required=False, default='outputs', help='Specify output name. default: outputs')
+parser.add_argument('--model_name', default='usem', help='Define model name, must be same as is in service. default: usem')
+parser.add_argument('--string',required=True, default='', help='String to query.')
+args = vars(parser.parse_args())
+
+channel = grpc.insecure_channel("{}:{}".format(args['grpc_address'],args['grpc_port']))
+stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
+
+
+data = np.array([args['string']])
+predict_request = predict_pb2.PredictRequest()
+predict_request.model_spec.name = args['model_name']
+predict_request.inputs[args['input_name']].CopyFrom(make_tensor_proto(data))
+start_time = datetime.datetime.now()
+predict_response = stub.Predict(predict_request, 10.0)
+end_time = datetime.datetime.now()
+duration = (end_time - start_time).total_seconds() * 1000
+print("processing time", duration, "ms.")
+output = make_ndarray(predict_response.outputs[args['output_name']])
+print("Output shape", output.shape)
+print("Output subset", output[0, :20])
diff --git a/demos/using_onnx_model/python/config.json b/demos/using_onnx_model/python/config.json
index a64d65e51d..c25117b762 100644
--- a/demos/using_onnx_model/python/config.json
+++ b/demos/using_onnx_model/python/config.json
@@ -7,7 +7,7 @@
"layout": "NHWC:NCHW",
"shape": "(1,224,224,3)",
"plugin_config": {
- "CPU_THROUGHPUT_STREAMS": "1"
+ "NUM_STREAMS": "1"
}
}
}
diff --git a/docs/accelerators.md b/docs/accelerators.md
index aa59c101b6..866059477d 100644
--- a/docs/accelerators.md
+++ b/docs/accelerators.md
@@ -11,63 +11,15 @@ docker run -u $(id -u):$(id -g) -v ${PWD}/models:/models:rw openvino/ubuntu20_de
mv ${PWD}/models/public/resnet-50-tf/FP32 ${PWD}/models/public/resnet-50-tf/1
```
-
-## Starting the server with the Intel® Neural Compute Stick 2
-
-[Intel Movidius Neural Compute Stick 2](https://software.intel.com/en-us/neural-compute-stick) can be employed by OVMS OpenVINO Model Server via
-[the MYRIAD plugin](https://docs.openvino.ai/2022.2/openvino_docs_OV_UG_supported_plugins_MYRIAD.html). It must be visible and accessible on the host machine.
-
-NCS devices should be reported by the `lsusb` command, printing out `ID 03e7:2485`.
-
-To start the server with Neural Compute Stick use either of the two options:
-
-1. Recommended, without the docker privileged mode and mounting only the usb devices.
-
-@sphinxdirective
-.. code-block:: sh
-
- docker run --rm -it -u 0 --device-cgroup-rule='c 189:* rmw' -v ${PWD}/models/public/resnet-50-tf:/opt/model -v /dev/bus/usb:/dev/bus/usb -p 9001:9001 openvino/model_server \
- --model_path /opt/model --model_name resnet --port 9001 --target_device MYRIAD
-
-@endsphinxdirective
-
-2. Less securely, in the docker privileged mode and mounting all devices.
- ```bash
- docker run --rm -it --net=host -u root --privileged -v ${PWD}/models/public/resnet-50-tf:/opt/model -v /dev:/dev -p 9001:9001 openvino/model_server \
- --model_path /opt/model --model_name resnet --port 9001 --target_device MYRIAD
- ```
-
-## Starting a Docker Container with HDDL
-
-To run a container that is using the HDDL accelerator, _hddldaemon_ must be running on the host machine.
-You must set up the environment (the OpenVINO package must be pre-installed) and start _hddldaemon_ on the host before starting a container.
-Refer to the steps from [OpenVINO installation guides](https://docs.openvino.ai/2022.2/openvino_docs_install_guides_installing_openvino_docker_linux.html#running-the-image-on-intel-vision-accelerator-design-with-intel-movidius-vpus).
-
-An example of a command starting a server with HDDL:
-```bash
-
-# --device=/dev/ion:/dev/ion mounts the accelerator device
-# -v /var/tmp:/var/tmp enables communication with _hddldaemon_ running on the host machine
-docker run --rm -it --device=/dev/ion:/dev/ion -v /var/tmp:/var/tmp -v ${PWD}/models/public/resnet-50-tf:/opt/model -p 9001:9001 openvino/model_server:latest \
---model_path /opt/model --model_name resnet --port 9001 --target_device HDDL
-```
-
-Check out our recommendations for [throughput optimization on HDDL](performance_tuning.md).
-
-> **NOTE**:
-> the OpenVINO Model Server process within the container communicates with _hddldaemon_ via unix sockets in the `/var/tmp` folder.
-> It requires RW permissions in the docker container security context.
-> It is recommended to start the docker container in the same context as the account starting _hddldaemon_. For example, if you start the _hddldaemon_ as root, add `--user root` to the `docker run` command.
-
## Starting a Docker Container with Intel integrated GPU, Intel® Data Center GPU Flex Series and Intel® Arc™ GPU
-The [GPU plugin](https://docs.openvino.ai/2022.2/openvino_docs_OV_UG_supported_plugins_GPU.html) uses the Intel Compute Library for
+The [GPU plugin](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_supported_plugins_GPU.html) uses the Intel Compute Library for
Deep Neural Networks ([clDNN](https://01.org/cldnn)) to infer deep neural networks. For inference execution, it employs Intel® Processor Graphics including
Intel® HD Graphics, Intel® Iris® Graphics, Intel® Iris® Xe Graphics, and Intel® Iris® Xe MAX graphics.
Before using GPU as OpenVINO Model Server target device, you need to:
-- install the required drivers - refer to [OpenVINO installation guide](https://docs.openvino.ai/2022.2/openvino_docs_install_guides_installing_openvino_from_archive_linux.html#step-4-optional-configure-inference-on-non-cpu-devices)
+- install the required drivers - refer to [OpenVINO installation guide](https://docs.openvino.ai/2023.0/openvino_docs_install_guides_installing_openvino_from_archive_linux.html#step-4-optional-configure-inference-on-non-cpu-devices)
- start the docker container with the additional parameter of `--device /dev/dri` to pass the device context
- set the parameter of `--target_device` to `GPU`.
- use the `openvino/model_server:latest-gpu` image, which contains GPU dependencies
@@ -107,7 +59,7 @@ Use device `/dev/dxg` instead of `/dev/dri` and mount the volume `/usr/lib/wsl`:
@sphinxdirective
.. code-block:: sh
- docker run --rm -it --device=/dev/dxg --volume /usr/lib/wsl:/usr/lib/wsl --group-add=$(stat -c "%g" /dev/dri/render* | head -n 1) -u $(id -u):$(id -g) \
+ docker run --rm -it --device=/dev/dxg --volume /usr/lib/wsl:/usr/lib/wsl -u $(id -u):$(id -g) \
-v ${PWD}/models/public/resnet-50-tf:/opt/model -p 9001:9001 openvino/model_server:latest-gpu \
--model_path /opt/model --model_name resnet --port 9001 --target_device GPU
@@ -121,7 +73,7 @@ If you need to build the OpenVINO Model Server with different driver version, re
## Using Multi-Device Plugin
If you have multiple inference devices available (e.g. Myriad VPUs and CPU) you can increase inference throughput by enabling the Multi-Device Plugin.
-It distributes Inference requests among multiple devices, balancing out the load. For more detailed information read OpenVINO’s [Multi-Device plugin documentation](https://docs.openvino.ai/2022.2/openvino_docs_OV_UG_Running_on_multiple_devices.html) documentation.
+It distributes Inference requests among multiple devices, balancing out the load. For more detailed information read OpenVINO’s [Multi-Device plugin documentation](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_Running_on_multiple_devices.html) documentation.
To use this feature in OpenVINO Model Server, you can choose one of two ways:
@@ -136,7 +88,7 @@ echo '{"model_config_list": [
"name": "resnet",
"base_path": "/opt/model",
"batch_size": "1",
- "target_device": "MULTI:MYRIAD,CPU"}
+ "target_device": "MULTI:GPU,CPU"}
}]
}' >> models/public/resnet-50-tf/config.json
```
@@ -153,7 +105,7 @@ openvino/model_server:latest --config_path /opt/model/config.json --port 9001
```bash
docker run -d --net=host -u root --privileged --name ie-serving --rm -v ${PWD}/models/public/resnet-50-tf/:/opt/model:ro -v \
-/dev:/dev -p 9001:9001 openvino/model_server:latest model --model_path /opt/model --model_name resnet --port 9001 --target_device 'MULTI:MYRIAD,CPU'
+/dev:/dev -p 9001:9001 openvino/model_server:latest model --model_path /opt/model --model_name resnet --port 9001 --target_device 'MULTI:GPU,CPU'
```
The deployed model will perform inference on both Intel Movidius Neural Compute Stick and CPU.
@@ -161,7 +113,7 @@ The total throughput will be roughly equal to the sum of CPU and Intel Movidius
## Using Heterogeneous Plugin
-The [HETERO plugin](https://docs.openvino.ai/2022.2/openvino_docs_OV_UG_Hetero_execution.html) makes it possible to distribute inference load of one model
+The [HETERO plugin](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_Hetero_execution.html) makes it possible to distribute inference load of one model
among several computing devices. That way different parts of the deep learning network can be executed by devices best suited to their type of calculations.
OpenVINO automatically divides the network to optimize the process.
@@ -183,7 +135,7 @@ echo '{"model_config_list": [
## Using AUTO Plugin
-[Auto Device](https://docs.openvino.ai/2022.2/openvino_docs_IE_DG_supported_plugins_AUTO.html) (or AUTO in short) is a new special “virtual” or “proxy” device in the OpenVINO toolkit, it doesn’t bind to a specific type of HW device.
+[Auto Device](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_supported_plugins_AUTO.html) (or AUTO in short) is a new special “virtual” or “proxy” device in the OpenVINO toolkit, it doesn’t bind to a specific type of HW device.
AUTO solves the complexity in application required to code a logic for the HW device selection (through HW devices) and then, on the deducing the best optimization settings on that device.
AUTO always chooses the best device, if compiling model fails on this device, AUTO will try to compile it on next best device until one of them succeeds.
Make sure you have passed the devices and access to the devices you want to use in for the docker image. For example with:
@@ -243,16 +195,22 @@ The docker image of OpenVINO Model Server including support for NVIDIA can be bu
```bash
git clone https://github.com/openvinotoolkit/model_server.git
cd model_server
- make docker_build NVIDIA=1 OV_USE_BINARY=0 OV_SOURCE_BRANCH=releases/2022/3 OV_CONTRIB_BRANCH=releases/2022/3
+ make docker_build NVIDIA=1 OV_USE_BINARY=0 OV_SOURCE_BRANCH=master OV_CONTRIB_BRANCH=master
+ cd ..
```
Check also [building from sources](https://github.com/openvinotoolkit/model_server/blob/develop/docs/build_from_source.md).
Example command to run container with NVIDIA support:
```bash
- docker run -it --gpus all -p 9178:9178 -v ${PWD}/models/public/resnet-50-tf:/opt/model openvino/model_server:latest-cuda --model_path /opt/model --model_name resnet --target_device NVIDIA
+ docker run -it --gpus all -p 9000:9000 -v ${PWD}/models/public/resnet-50-tf:/opt/model openvino/model_server:latest-cuda --model_path /opt/model --model_name resnet --port 9000 --target_device NVIDIA
+```
+
+For models with layers not supported on NVIDIA plugin, you can use a vritual pluging `HETERO` which can use multiple devices listed after the colon:
+```bash
+ docker run -it --gpus all -p 9000:9000 -v ${PWD}/models/public/resnet-50-tf:/opt/model openvino/model_server:latest-cuda --model_path /opt/model --model_name resnet --port 9000 --target_device HETERO:NVIDIA,CPU
```
Check the supported [configuration parameters](https://github.com/openvinotoolkit/openvino_contrib/tree/master/modules/nvidia_plugin#supported-configuration-parameters) and [supported layers](https://github.com/openvinotoolkit/openvino_contrib/tree/master/modules/nvidia_plugin#supported-layers-and-limitations)
-Currently the AUTO, MULTI and HETERO virual plugins do not support NVIDIA plugin as an alternative device.
\ No newline at end of file
+Currently the AUTO and MULTI virual plugins do not support NVIDIA plugin as an alternative device.
\ No newline at end of file
diff --git a/docs/advanced_topics.md b/docs/advanced_topics.md
index 1f9ab27931..4804027758 100644
--- a/docs/advanced_topics.md
+++ b/docs/advanced_topics.md
@@ -19,7 +19,7 @@ Implement any CPU layer, that is not support by OpenVINO yet, as a shared librar
[Learn more](../src/example/SampleCpuExtension/README.md)
## Model Cache
-Leverage the OpenVINO [model caching](https://docs.openvino.ai/latest/openvino_docs_OV_UG_Model_caching_overview.html) feature to speed up subsequent model loading on a target device.
+Leverage the OpenVINO [model caching](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_Model_caching_overview.html) feature to speed up subsequent model loading on a target device.
[Learn more](model_cache.md)
diff --git a/docs/binary_input.md b/docs/binary_input.md
index 67eb8e9fe2..a9a3b349fc 100644
--- a/docs/binary_input.md
+++ b/docs/binary_input.md
@@ -1,4 +1,4 @@
-# Support for Binary Input Data {#ovms_docs_binary_input}
+# Support for Binary Encoded Image Input Data {#ovms_docs_binary_input}
@sphinxdirective
@@ -20,4 +20,4 @@ automatically from JPEG/PNG to OpenVINO friendly format using built-in [OpenCV](
- [TensorFlow Serving API](./binary_input_tfs.md)
- [KServe API](./binary_input_kfs.md)
- It's worth noting that with KServe API, you can also send raw data (that does not require processing by OpenCV) in binary form via REST. This makes KServe API more performant choice while working with REST interface. The guide linked above explains how to work with both regular data in binary format as well as JPEG/PNG encoded images.
\ No newline at end of file
+It's worth noting that with KServe API, you can also send raw data with or without image encoding via REST API. This makes KServe REST API more performant choice comparing to json format in TFS API. The guide linked above explains how to work with both regular data in binary format as well as JPEG/PNG encoded images.
\ No newline at end of file
diff --git a/docs/binary_input_kfs.md b/docs/binary_input_kfs.md
index 3295aad342..f15c153d05 100644
--- a/docs/binary_input_kfs.md
+++ b/docs/binary_input_kfs.md
@@ -4,21 +4,27 @@
KServe API allows sending the model input data in a variety of formats inside the [InferTensorContents](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#tensor-data-1) objects or in `raw_input_contents` field of [ModelInferRequest](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#inference-1).
-When the data is sent in the `bytes_contents` field of `InferTensorContents` and input `datatype` is set to `BYTES`, such input is interpreted as a binary encoded image. The `BYTES` datatype is dedicated to binary encoded **images** and if it's set, the data **must** be placed in `bytes_contents` or in `raw_input_contents` if batch size is equal to 1.
+When the data is sent to the model or pipeline that have 4 (or 5 in case of [demultiplexing](demultiplexing.md)) shape dimensions and input `datatype` is set to `BYTES`, such input is interpreted as a binary encoded image. Data of such inputs **must** be placed in `bytes_contents` or in `raw_input_contents`
+
+If data is located in `raw_input_contents` you need to precede data of every batch by 4 bytes(little endian) conatining size of this batch. For example, if batch would contain three images of sizes 370, 480, 500 bytes the content of raw_input_contents[index_of_the_input] would look like this:
+<0x72010000 (=370)><370 bytes of first image><0xE0010000 (=480)><480 bytes of second image> <0xF4010000 (=500)><500 bytes of third image>
Note, that while the model metadata reports the inputs shape with layout `NHWC`, the binary data must be sent with
shape: `[N]` with datatype: `BYTES`. Where `N` represents number of images converted to string bytes.
-When sending data in the array format, the shape and datatype gives information on how to interpret bytes in the contents. For binary encoded data, the only information given by the `shape` field is the amount of images in the batch. On the server side, the bytes in each element of the `bytes_contents` field are loaded, resized to match model input shape and converted to the OpenVINO-friendly array format by OpenCV.
+When sending data in the array format, the shape and datatype gives information on how to interpret bytes in the contents. For binary encoded data, the only information given by the `shape` field is the amount of images in the batch. On the server side, the bytes of every batch are loaded, resized to match model input shape and converted to the OpenVINO-friendly array format by OpenCV.
## HTTP
### JPEG / PNG encoded images
-KServe API also allows sending binary encoded data via HTTP interface. The tensor binary data is provided in the request body, after JSON object. While the JSON part contains information required to route the data to the target model and run inference properly, the data itself, in the binary format is placed right after the JSON.
+KServe API also allows sending encoded images via HTTP interface to the model or pipeline that have 4 (or 5 in case of [demultiplexing](demultiplexing.md)) shape dimensions. Similar to GRPC input with such datatype `datatype` needs to be `BYTES`. The tensor binary data is provided in the request body, after JSON object. While the JSON part contains information required to route the data to the target model and run inference properly, the data itself, in the binary format is placed right after the JSON. Therefore, you need to precede data of every image by 4 bytes(little endian) conatining size of this image and specify their combined size in `binary_data_size` parameter.
+
+For binary inputs, the `parameters` map in the JSON part contains `binary_data_size` field for each binary input that indicates the size of the data on the input. Since there's no strict limitations on image resolution and format (as long as it can be loaded by OpenCV), images might be of different sizes. To send a batch of images you need to precede data of every batch by 4 bytes(little endian) conatining size of this batch and specify their combined size in `binary_data_size`. For example, if batch would contain three images of sizes 370, 480, 500 bytes the content of input buffer inside binary extension would look like this:
+<0x72010000 (=370)><370 bytes of first image><0xE0010000 (=480)><480 bytes of second image> <0xF4010000 (=500)><500 bytes of third image>
+And in that case binary_data_size would be 1350(370 + 480 + 500)
+Function set_data_from_numpy in triton client lib that we use in our [REST sample](https://github.com/openvinotoolkit/model_server/blob/develop/client/python/kserve-api/samples/http_infer_binary_resnet.py) automatically converts given images to this format.
-For binary inputs, the `parameters` map in the JSON part contains `binary_data_size` field for each binary input that indicates the size of the data on the input. Since there's no strict limitations on image resolution and format (as long as it can be loaded by OpenCV), images might be of different sizes. Therefore, to send a batch of different images, specify their sizes in `binary_data_size` field as a list with sizes of all images in the batch.
-The list must be formed as a string, so for example, for 3 images in the batch, you may pass - `"9821,12302,7889"`.
If the request contains only one input `binary_data_size` parameter can be omitted - in this case whole buffer is treated as a input image.
For HTTP request headers, `Inference-Header-Content-Length` header must be provided to give the length of the JSON object, and `Content-Length` continues to give the full body length (as HTTP requires). See an extended example with the request headers, and multiple images in the batch:
@@ -42,7 +48,7 @@ For the Raw Data binary inputs `binary_data_size` parameter can be omitted since
## Usage examples
-Sample clients that use binary inputs via KFS API can be found here ([REST sample](https://github.com/openvinotoolkit/model_server/blob/develop/client/python/kserve-api/samples/http_infer_binary_resnet.py))/([GRPC sample](https://github.com/openvinotoolkit/model_server/blob/develop/client/python/kserve-api/samples/http_infer_binary_resnet.py))
+Sample clients that use binary inputs via KFS API can be found here ([REST sample](https://github.com/openvinotoolkit/model_server/blob/develop/client/python/kserve-api/samples/http_infer_binary_resnet.py))/([GRPC sample](https://github.com/openvinotoolkit/model_server/blob/develop/client/python/kserve-api/samples/grpc_infer_binary_resnet.py))
Also, see the ([README](https://github.com/openvinotoolkit/model_server/blob/develop/client/python/kserve-api/samples/README.md))
diff --git a/docs/binary_input_tfs.md b/docs/binary_input_tfs.md
index c2475e93e5..9d565e415f 100644
--- a/docs/binary_input_tfs.md
+++ b/docs/binary_input_tfs.md
@@ -5,7 +5,7 @@
TensorFlow Serving API allows sending the model input data in a variety of formats inside the [TensorProto](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor.proto) objects.
Array data is passed inside the `tensor_content` field, which represents the input data buffer.
-When the data is sent in the `string_val` field, such input is interpreted as a binary encoded image.
+When the data is sent in the `string_val` field to the model or pipeline that have 4 (or 5 in case of [demultiplexing](demultiplexing.md)) shape dimensions, such input is interpreted as a binary encoded image.
Note, that while the model metadata reports the inputs shape with layout NHWC, the binary data must be sent with
shape: [N] with dtype: DT_STRING. Where N represents number of images converted to string bytes.
@@ -14,7 +14,7 @@ When sending data in the array format, all bytes are in the same sequence in `te
## HTTP
-TensorFlow Serving API also allows sending binary encoded data via HTTP interface. The binary data needs to be Base64 encoded and put into `inputs` or `instances` field as a map in form:
+TensorFlow Serving API also allows sending encoded images via HTTP interface to the model or pipeline that have 4 (or 5 in case of [demultiplexing](demultiplexing.md)) shape dimensions. The binary data needs to be Base64 encoded and put into `inputs` or `instances` field as a map in form:
```
: {"b64":}
diff --git a/docs/build_from_source.md b/docs/build_from_source.md
index 340aa47ef2..97dbd021e5 100644
--- a/docs/build_from_source.md
+++ b/docs/build_from_source.md
@@ -5,7 +5,7 @@ This document gives information how to build docker images and the binary packag
## Prerequisites
1. [Docker Engine](https://docs.docker.com/engine/)
-1. Ubuntu 20.04 or RedHat 8.7 host
+1. Ubuntu 20.04, Ubuntu 22.04 or RedHat 8.7 host
1. make
1. bash
@@ -14,8 +14,8 @@ This document gives information how to build docker images and the binary packag
Makefile located in root directory of this repository contains all targets needed to build docker images and binary packages.
It contains `docker_build` target which by default builds multiple docker images:
-- `openvino/model_server:latest` - smallest release image containing only neccessary files to run model server on CPU, NCS and HDDL
-- `openvino/model_server:latest-gpu` - release image containing support for Intel GPU
+- `openvino/model_server:latest` - smallest release image containing only neccessary files to run model server on CPU
+- `openvino/model_server:latest-gpu` - release image containing support for Intel GPU and CPU
- `openvino/model_server:latest-nginx-mtls` - release image containing examplary NGINX MTLS configuration
- `openvino/model_server-build:latest` - image with builder environment containing all the tools to build OVMS
@@ -58,6 +58,16 @@ Select base OS:
make docker_build BASE_OS=redhat
```
+### `BASE_OS_TAG_UBUNTU`
+
+Select ubuntu base image version:
+- `20.04` ubuntu:20.04 (default value)
+- `22.04` ubuntu:22.04
+
+```bash
+make docker_build BASE_OS_TAG_UBUNTU=22.04
+```
+
Example:
@@ -67,8 +77,9 @@ Example:
Parameter used to control which GPU driver version will be installed. Supported versions:
| OS | Versions |
|---|---|
-| Ubuntu | 22.35.24055 (default), 22.10.22597, 21.48.21782, 20.35.17767 |
-| RedHat | 22.28.23726 (default), 22.10.22597, 21.38.21026, 20.35.17767 |
+| Ubuntu22 | 23.13.26032 (default), 22.35.24055, 22.10.22597, 21.48.21782 |
+| Ubuntu20 | 22.43.24595 (default), 22.35.24055, 22.10.22597, 21.48.21782 |
+| RedHat | 22.43.24595 (default), 22.28.23726, 22.10.22597, 21.38.21026 |
Additionally it is possible to specify custom (pre-production) drivers by providing location to NEO Runtime packages on local disk. Contact Intel representative to get the access to the pre-production drivers.
Warning: _Maintained only for Ubuntu base OS._
@@ -111,23 +122,64 @@ Use together with `OV_CONTRIB_BRANCH` to specify which branch from [OpenVINO con
Example:
```bash
-make docker_build NVIDIA=1 OV_USE_BINARY=0 OV_SOURCE_BRANCH=releases/2022/3 OV_CONTRIB_BRANCH=releases/2022/3
-```
-```bash
-docker run -it --gpus all -p 9178:9178 -v ${PWD}/models/public/resnet-50-tf:/opt/model openvino/model_server:latest-cuda --model_path /opt/model --model_name resnet --target_device NVIDIA
+make docker_build NVIDIA=1 OV_USE_BINARY=0 OV_SOURCE_BRANCH=master OV_CONTRIB_BRANCH=master
```
+Note. In order to build the image with redhat UBI8.7 as the base os, it is required to use a host with RedHat subscription and entitlements in `/etc/pki/entitlement` and `/etc/rhsm`.
+That is required to install several building dependencies.
+
### `OV_USE_BINARY`
By default set to `1`. When set to `0`, OpenVINO will be built from sources and `DLDT_PACKAGE_URL` will be omitted.
-Use `OV_SOURCE_BRANCH` to select [OpenVINO repository](https://github.com/openvinotoolkit/openvino) branch. By default `master` will be used.
-Warning: _Maintained only for Ubuntu base OS._
+Use `OV_SOURCE_BRANCH` and `OV_SOURCE_ORG` to select [OpenVINO repository](https://github.com/openvinotoolkit/openvino) branch and fork. By default `master` will be used and org `openvinotoolkit`.
+
+Example:
+```bash
+make docker_build OV_USE_BINARY=0 OV_SOURCE_BRANCH= OV_SOURCE_ORG=
+```
+
+### `RUN_TESTS`
+
+Enables or disabled unit tests execution as part of the docker image building.
+- `0` Unit tests are skipped
+- `1` Unit tests are executed (default)
+
+```bash
+make docker_build RUN_TESTS=0
+```
+
+Running the unit tests will make the building last longer and it will consume a bit more RAM
+
+### `CHECK_COVERAGE`
+
+Enables or disabled calculating the unit tests coverage as part of the docker image building.
+- `0` Checking the coverage is skipped
+- `1` Checking the coverage is included
+
+```bash
+make docker_build RUN_TESTS=0
+```
+
+Running the unit tests will increase build time and consume more RAM
+
+### `JOBS`
+
+Number of compilation jobs. By default it is set to the number of CPU cores. On hosts with low RAM, this value can be reduced to avoid out of memory errors during the compilation.
+
+```bash
+make docker_build JOBS=2
+```
+
+
+### `MEDIAPIPE_DISABLE`
+
+When set to `0`, OpenVINO&trade Model Server will be built with [MediaPipe](mediapipe.md) support. Default value: `0`.
Example:
```bash
-make docker_build OV_USE_BINARY=0 OV_SOURCE_BRANCH=
+make docker_build MEDIAPIPE_DISABLE=0
```
-
+
Read more detailed usage in [developer guide](https://github.com/openvinotoolkit/model_server/blob/develop/docs/developer_guide.md).
diff --git a/docs/clients_kfs.md b/docs/clients_kfs.md
index 2161ac2344..0c1e20c211 100644
--- a/docs/clients_kfs.md
+++ b/docs/clients_kfs.md
@@ -331,50 +331,54 @@ When creating a Python-based client application, you can use Triton client libra
.. code-block:: python
- from tritonclient.grpc import service_pb2, service_pb2_grpc
- from tritonclient.utils import *
+ import tritonclient.grpc as grpclient
- client = grpcclient.InferenceServerClient("localhost:9000")
+ triton_client = grpclient.InferenceServerClient(
+ url="address",
+ ssl=False,
+ verbose=False)
+
image_data = []
with open("image_path", 'rb') as f:
image_data.append(f.read())
inputs = []
- inputs.append(service_pb2.ModelInferRequest().InferInputTensor())
- inputs[0].name = args['input_name']
- inputs[0].datatype = "BYTES"
- inputs[0].shape.extend([1])
- inputs[0].contents.bytes_contents.append(image_data[0])
+ inputs.append(grpclient.InferInput('input_name', 1, "BYTES"))
+ nmpy = np.array(image_data , dtype=np.object_)
+ inputs[0].set_data_from_numpy(nmpy)
outputs = []
- outputs.append(service_pb2.ModelInferRequest().InferRequestedOutputTensor())
- outputs[0].name = "output_name"
+ outputs.append(grpclient.InferRequestedOutput("output_name"))
- request = service_pb2.ModelInferRequest()
- request.model_name = "model_name'"
- request.inputs.extend(inputs)
- request.outputs.extend(outputs)
- response = grpc_stub.ModelInfer(request)
+ results = triton_client.infer(model_name="model_name",
+ inputs=inputs,
+ outputs=outputs)
.. tab:: python [REST]
.. code-block:: python
- import requests
- import json
-
- url = f"http://{address}/v2/models/{model_name}/infer"
- http_session = requests.session()
+ import tritonclient.http as httpclient
+ triton_client = httpclient.InferenceServerClient(
+ url="address",
+ ssl=False,
+ ssl_options=None,
+ verbose=False)
+
image_data = []
- image_binary_size = []
with open("image_path", 'rb') as f:
image_data.append(f.read())
- image_binary_size.append(len(image_data[-1]))
- image_binary_size_str = ",".join(map(str, image_binary_size))
- inference_header = {"inputs":[{"name":input_name,"shape":[batch_i],"datatype":"BYTES","parameters":{"binary_data_size":image_binary_size_str}}]}
- inference_header_binary = json.dumps(inference_header).encode()
+ inputs = []
+ inputs.append(httpclient.InferInput('input_name', 1, "BYTES"))
+ nmpy = np.array(image_data , dtype=np.object_)
+ inputs[0].set_data_from_numpy(nmpy)
- results = http_session.post(url, inference_header_binary + b''.join(image_data), headers={"Inference-Header-Content-Length":str(len(inference_header_binary))})
+ outputs = []
+ outputs.append(httpclient.InferRequestedOutput("output_name"))
+
+ results = triton_client.infer(model_name="model_name",
+ inputs=inputs,
+ outputs=outputs)
.. tab:: cpp [GRPC]
@@ -387,29 +391,26 @@ When creating a Python-based client application, you can use Triton client libra
std::unique_ptr client;
tc::InferenceServerGrpcClient::Create(&client, "localhost:9000");
- std::vector shape{1, 10};
+ std::vector shape{1};
tc::InferInput* input;
- tc::InferInput::Create(&input, "input_name", shape, "FP32");
+ tc::InferInput::Create(&input, "input_name", shape, "BYTES");
std::shared_ptr input_ptr;
input_ptr.reset(input)
- std::ifstream fileImg("image_path", std::ios::binary);
- fileImg.seekg(0, std::ios::end);
- int bufferLength = fileImg.tellg();
- fileImg.seekg(0, std::ios::beg);
-
- char* buffer = new char[bufferLength];
- fileImg.read(buffer, bufferLength);
+ std::ifstream file(fileName, std::ios::binary);
+ file.unsetf(std::ios::skipws);
+ std::streampos fileSize;
- std::vector input_data = std::vector(buffer, buffer + bufferLength);
- input_ptr->AppendRaw(input_data);
+ file.seekg(0, std::ios::end);
+ fileSize = file.tellg();
+ file.seekg(0, std::ios::beg);
+ std::ostringstream oss;
+ oss << file.rdbuf();
+ input_ptr->AppendFromString({oss.str()});
tc::InferOptions options("model_name");
tc::InferResult* result;
client->Infer(&result, options, inputs);
- input->Reset();
-
- delete buffer;
}
.. tab:: cpp [REST]
@@ -425,27 +426,24 @@ When creating a Python-based client application, you can use Triton client libra
std::vector shape{1};
tc::InferInput* input;
- tc::InferInput::Create(&input, input_name, shape, "BYTES");
+ tc::InferInput::Create(&input, "input_name", shape, "BYTES");
std::shared_ptr input_ptr;
input_ptr.reset(input)
- std::ifstream fileImg("image_path", std::ios::binary);
- fileImg.seekg(0, std::ios::end);
- int bufferLength = fileImg.tellg();
- fileImg.seekg(0, std::ios::beg);
-
- char* buffer = new char[bufferLength];
- fileImg.read(buffer, bufferLength);
+ std::ifstream file(fileName, std::ios::binary);
+ file.unsetf(std::ios::skipws);
+ std::streampos fileSize;
- std::vector input_data = std::vector(buffer, buffer + bufferLength);
- input_ptr->AppendRaw(input_data);
+ file.seekg(0, std::ios::end);
+ fileSize = file.tellg();
+ file.seekg(0, std::ios::beg);
+ std::ostringstream oss;
+ oss << file.rdbuf();
+ input_ptr->AppendFromString({oss.str()});
tc::InferOptions options("model_name");
tc::InferResult* result;
client->Infer(&result, options, inputs);
- input->Reset();
-
- delete buffer;
}
.. tab:: java
@@ -470,8 +468,10 @@ When creating a Python-based client application, you can use Triton client libra
input.addShape(1);
FileInputStream imageStream = new FileInputStream("image_path");
- request.clearRawInputContents();
- request.addRawInputContents(ByteString.readFrom(imageStream));
+ InferTensorContents.Builder input_data = InferTensorContents.newBuilder();
+ input_data.addBytesContents(ByteString.readFrom(imageStream));
+ input.setContents(input_data);
+ request.addInputs(0, input);
ModelInferResponse response = grpc_stub.modelInfer(request.build());
@@ -488,20 +488,24 @@ When creating a Python-based client application, you can use Triton client libra
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
- inferInputs := []*grpc_client.ModelInferRequest_InferInputTensor{
- &grpc_client.ModelInferRequest_InferInputTensor{
- Name: "0",
- Datatype: "BYTES",
- Shape: []int64{1},
- },
+ bytes, err := ioutil.ReadFile(fileName)
+ contents := grpc_client.InferTensorContents{}
+ contents.BytesContents = append(contents.BytesContents, bytes)
+
+ inferInput := grpc_client.ModelInferRequest_InferInputTensor{
+ Name: "0",
+ Datatype: "BYTES",
+ Shape: []int64{1},
+ Contents: &contents,
}
- bytes, err := ioutil.ReadFile(fileName)
- modelInferRequest.RawInputContents = append(modelInferRequest.RawInputContents, bytes)
+ inferInputs := []*grpc_client.ModelInferRequest_InferInputTensor{
+ &inferInput,
+ }
modelInferRequest := grpc_client.ModelInferRequest{
- ModelName: "model_name",
- ModelVersion: "model_version",
+ ModelName: modelName,
+ ModelVersion: modelVersion,
Inputs: inferInputs,
}
@@ -514,6 +518,9 @@ When creating a Python-based client application, you can use Triton client libra
echo -n '{"inputs” : [{"name" : "0", "shape" : [1], "datatype" : "BYTES"}]}' > request.json
stat --format=%s request.json
66
+ printf "%x\n" `stat -c "%s" ./image.jpeg`
+ 1c21
+ echo -n -e '\x21\x1c\x00\x00' >> request.json
cat ./image.jpeg >> request.json
curl --data-binary "@./request.json" -X POST http://localhost:8000/v2/models/resnet/versions/0/infer -H "Inference-Header-Content-Length: 66"
@@ -546,7 +553,7 @@ When creating a Python-based client application, you can use Triton client libra
data = np.array([1.0, 2.0, ..., 1000.0])
infer_input = httpclient.InferInput("input_name", data.shape, "FP32")
infer_input.set_data_from_numpy(data)
- results = client.infer("model_name", [infer_input]
+ results = client.infer("model_name", [infer_input])
.. tab:: cpp [GRPC]
@@ -563,7 +570,7 @@ When creating a Python-based client application, you can use Triton client libra
tc::InferInput* input;
tc::InferInput::Create(&input, "input_name", shape, "FP32");
std::shared_ptr input_ptr;
- input_ptr.reset(input)
+ input_ptr.reset(input);
std::vector input_data(10);
for (size_t i = 0; i < 10; ++i) {
@@ -592,7 +599,7 @@ When creating a Python-based client application, you can use Triton client libra
tc::InferInput* input;
tc::InferInput::Create(&input, "input_name", shape, "FP32");
std::shared_ptr input_ptr;
- input_ptr.reset(input)
+ input_ptr.reset(input);
std::vector input_data(10);
for (size_t i = 0; i < 10; ++i) {
@@ -684,4 +691,98 @@ When creating a Python-based client application, you can use Triton client libra
@endsphinxdirective
+### Request Prediction on a string
+
+@sphinxdirective
+.. tab:: python [GRPC]
+
+ .. code-block:: python
+
+ import numpy as np
+ import tritonclient.grpc as grpcclient
+
+ client = grpcclient.InferenceServerClient("localhost:9000")
+ data = ""
+ input = np.array([data.encode('utf-8')], dtype=np.object_)
+ infer_input = grpcclient.InferInput("input_name", [1], "BYTES")
+ infer_input.set_data_from_numpy(input)
+ results = client.infer("model_name", [infer_input])
+
+.. tab:: python [REST]
+
+ .. code-block:: python
+
+ import numpy as np
+ import tritonclient.http as httpclient
+
+ client = httpclient.InferenceServerClient("localhost:9000")
+ data = ""
+ input = np.array([data.encode('utf-8')], dtype=np.object_)
+ infer_input = httpclient.InferInput("input_name", [1], "BYTES")
+ infer_input.set_data_from_numpy(input)
+ results = client.infer("model_name", [infer_input])
+
+.. tab:: cpp [GRPC]
+
+ .. code-block:: cpp
+
+ #include "grpc_client.h"
+
+ namespace tc = triton::client;
+
+ int main(int argc, char** argv) {
+ std::unique_ptr client;
+ tc::InferenceServerGrpcClient::Create(&client, "localhost:9000");
+
+ tc::InferInput* input;
+ tc::InferInput::Create(&input, "input_name", {1}, "BYTES");
+ std::shared_ptr input_ptr;
+ input_ptr.reset(input);
+ input_ptr->AppendFromString({std::string("")});
+ std::vector inputs = {input_ptr.get()};
+
+ tc::InferOptions options("model_name");
+ tc::InferResult* results;
+ client->Infer(&results, options, inputs);
+ std::shared_ptr results_ptr;
+ results_ptr.reset(results);
+ return 0;
+ }
+
+.. tab:: cpp [REST]
+
+ .. code-block:: cpp
+
+ #include "http_client.h"
+
+ namespace tc = triton::client;
+
+ int main(int argc, char** argv) {
+ std::unique_ptr client;
+ tc::InferenceServerHttpClient::Create(&client, "localhost:9000");
+
+ tc::InferInput* input;
+ tc::InferInput::Create(&input, "input_name", {1}, "BYTES");
+ std::shared_ptr input_ptr;
+ input_ptr.reset(input);
+ input_ptr->AppendFromString({std::string("")});
+ std::vector inputs = {input_ptr.get()};
+
+ tc::InferOptions options("model_name");
+ tc::InferResult* results;
+ client->Infer(&results, options, inputs);
+ std::shared_ptr results_ptr;
+ results_ptr.reset(results);
+ return 0;
+ }
+
+.. tab:: curl
+
+ .. code-block:: sh
+
+ curl -X POST http://localhost:9000/v2/models/model_name/infer
+ -H 'Content-Type: application/json'
+ -d '{"inputs" : [ {"name" : "input_name", "shape" : [ 1 ], "datatype" : "BYTES", "data" : [""]} ]}'
+
+@endsphinxdirective
For complete usage examples see [Kserve samples](https://github.com/openvinotoolkit/model_server/tree/develop/client/python/kserve-api/samples).
diff --git a/docs/clients_tfs.md b/docs/clients_tfs.md
index e65b78b8bd..4e1b07c57a 100644
--- a/docs/clients_tfs.md
+++ b/docs/clients_tfs.md
@@ -285,6 +285,60 @@ When creating a Python-based client application, there are two packages on PyPi
@endsphinxdirective
+### Request Prediction on a string
+
+@sphinxdirective
+
+.. tab:: ovmsclient [GRPC]
+
+ .. code-block:: python
+
+ from ovmsclient import make_grpc_client
+
+ client = make_grpc_client("localhost:9000")
+ data = [""]
+ inputs = {"input_name": data}
+ results = client.predict(inputs=inputs, model_name="my_model")
+
+.. tab:: ovmsclient [REST]
+
+ .. code-block:: python
+
+ from ovmsclient import make_http_client
+
+ client = make_http_client("localhost:8000")
+
+ data = [""]
+ inputs = {"input_name": data}
+ results = client.predict(inputs=inputs, model_name="my_model")
+
+.. tab:: tensorflow-serving-api
+
+ .. code-block:: python
+
+ import grpc
+ from tensorflow_serving.apis import prediction_service_pb2_grpc, predict_pb2
+ from tensorflow import make_tensor_proto
+
+ channel = grpc.insecure_channel("localhost:9000")
+ prediction_service_stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
+
+ data = [""]
+ predict_request = predict_pb2.PredictRequest()
+ predict_request.model_spec.name = "my_model"
+ predict_request.inputs["input_name"].CopyFrom(make_tensor_proto(data))
+ predict_response = prediction_service_stub.Predict(predict_request, 1)
+ results = predict_response.outputs["output_name"]
+
+.. tab:: curl
+
+ .. code-block:: sh
+
+ curl -X POST http://localhost:8000/v1/models/my_model:predict
+ -H 'Content-Type: application/json'
+ -d '{"instances": [{"input_name": ""}]}'
+
+@endsphinxdirective
For complete usage examples see [ovmsclient samples](https://github.com/openvinotoolkit/model_server/tree/releases/2022/1/client/python/ovmsclient/samples).
@sphinxdirective
diff --git a/docs/custom_node_development.md b/docs/custom_node_development.md
index aba0516bd6..cb7798c531 100644
--- a/docs/custom_node_development.md
+++ b/docs/custom_node_development.md
@@ -123,6 +123,27 @@ Just add include statement like:
#include "opencv2/core.hpp"
```
+## String support
+There are special consideration when handling in the custom nodes the input sent by the clients as string. Such data when received by the OVMS frontend, is automatically converted to a 2D array with shape [-1,-1]. Example of custom node using this feature is our [Tokenizer](https://github.com/openvinotoolkit/model_server/tree/develop/src/custom_nodes/tokenizer).
+
+### inputs
+When strings are send to the custom node that has 2-dimensional shape and U8 precision OVMS, after receiving request containig such inputs converts them to the 2 dimensional U8 array of shape [number of strings, length of the longest string + 1] with padding filled with zeros. For example batch of three strings ["String_123", "", "zebra"] would be converted to:
+```
+['S', 't', 'r', 'i', 'n', 'g', '_', '1', '2', '3', 0, // String_123
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ""
+'z', 'e', 'b', 'r', 'a', 0, 0, 0, 0, 0, 0] // "zebra"
+```
+
+### outputs
+
+When the name of the custom node output is suffixed with _string, its shape has 2 dimensions and precision is U8 OVMS treats data of such output as array that contains string in every row with padding filled with zeros and convert automatically data of such outputs to strings. For example U8 array:
+```
+['S', 't', 'r', 'i', 'n', 'g', '_', '1', '2', '3', 0, // String_123
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ""
+'z', 'e', 'b', 'r', 'a', 0, 0, 0, 0, 0, 0] // "zebra"
+```
+would be converted to ["String_123", "", "zebra"].
+
## Building
Custom node library can be compiled using any tool. It is recommended to follow the example based
diff --git a/docs/deploying_server.md b/docs/deploying_server.md
index e52ca2e180..fd60c0560f 100644
--- a/docs/deploying_server.md
+++ b/docs/deploying_server.md
@@ -13,7 +13,7 @@ This is a step-by-step guide on how to deploy OpenVINO™ Model Server on Li
- [Docker Engine](https://docs.docker.com/engine/) installed
- Intel® Core™ processor (6-13th gen.) or Intel® Xeon® processor (1st to 4th gen.)
- Linux, macOS or Windows via [WSL](https://docs.microsoft.com/en-us/windows/wsl/)
-- (optional) AI accelerators [supported by OpenVINO](https://docs.openvino.ai/2022.2/openvino_docs_IE_DG_supported_plugins_Supported_Devices.html). Accelerators are tested only on bare-metal Linux hosts.
+- (optional) AI accelerators [supported by OpenVINO](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_Working_with_devices.html). Accelerators are tested only on bare-metal Linux hosts.
### Launch Model Server Container
@@ -74,37 +74,95 @@ If everything is set up correctly, you will see 'zebra' prediction in the output
## Deploying Model Server on Baremetal (without container)
It is possible to deploy Model Server outside of container.
-To deploy Model Server on baremetal, use pre-compiled binaries for Ubuntu20 or RHEL8.
-Find latest binary package in [release](https://github.com/openvinotoolkit/model_server/releases) page.
-Alternatively it is possible to build package from source:
+To deploy Model Server on baremetal, use pre-compiled binaries for Ubuntu20, Ubuntu22 or RHEL8.
-```bash
-git clone https://github.com/openvinotoolkit/model_server
-cd model_server
-make docker_build
-```
+@sphinxdirective
-The `ovms.tar.gz` package will appear in `dist/ubuntu` or `dist/redhat` directory.
-Unpack the package:
+.. tab:: Ubuntu 20.04
-```bash
-tar -xzvf dist/ubuntu/ovms.tar.gz
-```
+ Download precompiled package:
+
+ .. code-block:: sh
-Install required libraries depending on the OS.
-For Ubuntu 20.04:
-```bash
-apt update -y && apt install -y libpugixml1v5 libtbb2
-```
-For RedHat 8.7:
-```bash
-microdnf install -y pkg-config && rpm -ivh https://vault.centos.org/centos/8/AppStream/x86_64/os/Packages/tbb-2018.2-9.el8.x86_64.rpm
-```
+ wget https://github.com/openvinotoolkit/model_server/releases/download/v2023.0/ovms_ubuntu20.tar.gz
+
+ or build it yourself:
+
+ .. code-block:: sh
+
+ # Clone the model server repository
+ git clone https://github.com/openvinotoolkit/model_server
+ cd model_server
+ # Build docker images (the binary is one of the artifacts)
+ make docker_build
+ # Unpack the package
+ tar -xzvf dist/ubuntu/ovms.tar.gz
+
+ Install required libraries:
+
+ .. code-block:: sh
+
+ sudo apt update -y && apt install -y libpugixml1v5 libtbb2
+
+.. tab:: Ubuntu 22.04
+
+ Download precompiled package:
+
+ .. code-block:: sh
+
+ wget https://github.com/openvinotoolkit/model_server/releases/download/v2023.0/ovms_ubuntu22.tar.gz
+
+ or build it yourself:
+
+ .. code-block:: sh
+
+ # Clone the model server repository
+ git clone https://github.com/openvinotoolkit/model_server
+ cd model_server
+ # Build docker images (the binary is one of the artifacts)
+ make docker_build BASE_OS_TAG_UBUNTU=22.04
+ # Unpack the package
+ tar -xzvf dist/ubuntu/ovms.tar.gz
+
+ Install required libraries:
+
+ .. code-block:: sh
+
+ sudo apt update -y && apt install -y libpugixml1v5
+
+.. tab:: RHEL 8.7
+
+ Download precompiled package:
+
+ .. code-block:: sh
+
+ wget https://github.com/openvinotoolkit/model_server/releases/download/v2023.0/ovms_redhat.tar.gz
+
+ or build it yourself:
+
+ .. code-block:: sh
+
+ # Clone the model server repository
+ git clone https://github.com/openvinotoolkit/model_server
+ cd model_server
+ # Build docker images (the binary is one of the artifacts)
+ make docker_build BASE_OS=redhat
+ # Unpack the package
+ tar -xzvf dist/redhat/ovms.tar.gz
+
+ Install required libraries:
+
+ .. code-block:: sh
+
+ sudo dnf install -y pkg-config && sudo rpm -ivh https://vault.centos.org/centos/8/AppStream/x86_64/os/Packages/tbb-2018.2-9.el8.x86_64.rpm
+
+@endsphinxdirective
Start the server:
```bash
wget https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.1/models_bin/2/resnet50-binary-0001/FP32-INT1/resnet50-binary-0001.{xml,bin} -P models/resnet50/1
+
./ovms/bin/ovms --model_name resnet --model_path models/resnet50
```
@@ -115,7 +173,7 @@ Learn more about model server [starting parameters](parameters.md).
> **NOTE**:
> When serving models on [AI accelerators](accelerators.md), some additional steps may be required to install device drivers and dependencies.
-> Learn more in the [Additional Configurations for Hardware](https://docs.openvino.ai/latest/openvino_docs_install_guides_configurations_header.html) documentation.
+> Learn more in the [Additional Configurations for Hardware](https://docs.openvino.ai/2023.0/openvino_docs_install_guides_configurations_header.html) documentation.
## Deploying Model Server in Kubernetes
diff --git a/docs/dynamic_shape_dynamic_model.md b/docs/dynamic_shape_dynamic_model.md
index aae42a41be..8b1c71e5a3 100644
--- a/docs/dynamic_shape_dynamic_model.md
+++ b/docs/dynamic_shape_dynamic_model.md
@@ -8,7 +8,7 @@ Enable dynamic shape by setting the `shape` parameter to range or undefined:
- `--shape "(1,3,200:500,200:500)"` when model is supposed to support height and width values in a range of 200-500. Note that any dimension can support range of values, height and width are only examples here.
> Note that some models do not support dynamic dimensions. Learn more about supported model graph layers including all limitations
-on [Shape Inference Document](https://docs.openvino.ai/2022.2/openvino_docs_IE_DG_ShapeInference.html).
+on [Shape Inference Document](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_ShapeInference.html).
Another option to use dynamic shape feature is to export the model with dynamic dimension using Model Optimizer. OpenVINO Model Server will inherit the dynamic shape and no additional settings are needed.
diff --git a/docs/features.md b/docs/features.md
index 22e392ae0e..af13d1dee6 100644
--- a/docs/features.md
+++ b/docs/features.md
@@ -8,6 +8,7 @@
ovms_docs_dag
ovms_docs_binary_input
+ ovms_docs_text
ovms_docs_model_version_policy
ovms_docs_shape_batch_layout
ovms_docs_online_config_changes
@@ -16,6 +17,7 @@
ovms_docs_dynamic_input
ovms_docs_c_api
ovms_docs_advanced
+ ovms_docs_mediapipe
@endsphinxdirective
diff --git a/docs/mediapipe.md b/docs/mediapipe.md
new file mode 100644
index 0000000000..7d7772824b
--- /dev/null
+++ b/docs/mediapipe.md
@@ -0,0 +1,194 @@
+# Integration with mediapipe (preview) {#ovms_docs_mediapipe}
+
+@sphinxdirective
+
+.. toctree::
+ :maxdepth: 1
+ :hidden:
+
+ ovms_docs_mediapipe_calculators
+
+@endsphinxdirective
+
+## Introduction
+MediaPipe is an open-source framework for building pipelines to perform inference over arbitrary sensory data. Using MediaPipe in the OVMS enables user to define a powerful graph from a lot of ready calculators/nodes that come with the MediaPipe which support all the needed features for running a stable graph like e.g. flow limiter node. User can also run the graph in a server or run it inside application host. Here can be found more information about [MediaPipe framework ](https://developers.google.com/mediapipe/framework/framework_concepts/overview)
+
+This guide gives information about:
+
+* How to build OVMS with MediaPipe support
+* OVMS Calculators
+* Graph proto files
+* Configuration files
+* Using the mediapipe graphs
+* Graphs examples
+* Current Limitations
+
+## How to build OVMS with mediapipe support
+Building OVMS with mediapipe support requires passing additional flag for make command, for example:
+
+```
+MEDIAPIPE_DISABLE=0 make docker_build
+```
+
+More information about OVMS build parameters can be found here [here](https://github.com/openvinotoolkit/model_server/blob/develop/docs/build_from_source.md).
+
+## Node Types
+
+"Each calculator is a node of a graph. The bulk of graph execution happens inside its calculators. OVMS has its own calculators but can also use newly developed calculators or reuse the existing calculators defined in the original mediapipe repository."
+
+For more details you can visit mediapipe concept description - [Calculators Concept Page](https://developers.google.com/mediapipe/framework/framework_concepts/calculators) or OVMS specific calculators implementation - [Ovms Calculators Concept Page](https://github.com/openvinotoolkit/model_server/blob/releases/2023/0/src/mediapipe_calculators/calculators.md)
+
+## Graph proto files
+
+Graph proto files are used to define a graph. Example content of proto file with graph containing ModelAPICalculator nodes:
+
+```
+
+input_stream: "in1"
+input_stream: "in2"
+output_stream: "out"
+node {
+ calculator: "ModelAPISessionCalculator"
+ output_side_packet: "SESSION:dummy"
+ node_options: {
+ [type.googleapis.com / mediapipe.ModelAPIOVMSSessionCalculatorOptions]: {
+ servable_name: "dummy"
+ servable_version: "1"
+ }
+ }
+}
+node {
+ calculator: "ModelAPISessionCalculator"
+ output_side_packet: "SESSION:add"
+ node_options: {
+ [type.googleapis.com / mediapipe.ModelAPIOVMSSessionCalculatorOptions]: {
+ servable_name: "add"
+ servable_version: "1"
+ }
+ }
+}
+node {
+ calculator: "ModelAPISideFeedCalculator"
+ input_side_packet: "SESSION:dummy"
+ input_stream: "DUMMY_IN:in1"
+ output_stream: "DUMMY_OUT:dummy_output"
+ node_options: {
+ [type.googleapis.com / mediapipe.ModelAPIInferenceCalculatorOptions]: {
+ tag_to_input_tensor_names {
+ key: "DUMMY_IN"
+ value: "b"
+ }
+ tag_to_output_tensor_names {
+ key: "DUMMY_OUT"
+ value: "a"
+ }
+ }
+ }
+}
+node {
+ calculator: "ModelAPISideFeedCalculator"
+ input_side_packet: "SESSION:add"
+ input_stream: "ADD_INPUT1:dummy_output"
+ input_stream: "ADD_INPUT2:in2"
+ output_stream: "SUM:out"
+ node_options: {
+ [type.googleapis.com / mediapipe.ModelAPIInferenceCalculatorOptions]: {
+ tag_to_input_tensor_names {
+ key: "ADD_INPUT1"
+ value: "input1"
+ }
+ tag_to_input_tensor_names {
+ key: "ADD_INPUT2"
+ value: "input2"
+ }
+ tag_to_output_tensor_names {
+ key: "SUM"
+ value: "sum"
+ }
+ }
+ }
+}
+
+```
+
+
+Here can be found more information about [MediaPipe graphs proto](https://developers.google.com/mediapipe/framework/framework_concepts/graphs)
+
+## Configuration files
+MediaPipe graph configuration is to be placed in the same json file like the
+[models config file](starting_server.md).
+While models are defined in section `model_config_list`, graphs are configured in
+the `mediapipe_config_list` section.
+
+Basic graph section template is depicted below:
+
+```
+
+{
+ "model_config_list": [...],
+ "mediapipe_config_list": [
+ {
+ "name":"mediaDummy",
+ "base_path":"/mediapipe/graphs/",
+ "graph_path":"graphdummyadapterfull.pbtxt",
+ "subconfig":"subconfig_dummy.json"
+ }
+ ]
+}
+
+```
+
+Basic subconfig:
+
+```
+
+{
+ "model_config_list": [
+ {"config": {
+ "name": "dummy",
+ "base_path": "/models/dummy",
+ "shape": "(1, 10)"
+ }
+ }
+ ]
+}
+
+
+```
+Nodes in the MediaPipe graphs can reference both to the models configured in model_config_list section and in subconfig.
+
+### MediaPipe configuration options explained
+
+|Option|Type|Description|Required|
+|:---|:---|:---|:---|
+|`"name"`|string|Graph identifier related to name field specified in gRPC/REST request|Yes|
+|`"base_path"`|string|Path to the which graph definition and subconfig files paths are relative. May be absolute or relative to the main config path. Default value is "(main config path)\"|No|
+|`"graph_path"`|string|Path to the graph proto file. May be absolute or relative to the base_path. Default value is "(base_path)\graph.pbtxt". File have to exist.|No|
+|`"subconfig"`|string|Path to the subconfig file. May be absolute or relative to the base_path. Default value is "(base_path)\subconfig.json". Missing file does not result in error.|No|
+
+Subconfig file may only contain *model_config_list* section - in the same format as in [models config file](starting_server.md).
+
+## Using Mediapipe
+
+MediaPipe graphs can use the same KServe Inference API as the models. There are exactly the same calls for running
+the predictions. The request format must match the pipeline definition inputs.
+
+Graphs can be queried for their state using the calls [GetModelStatus](model_server_grpc_api_kfs.md)
+and [REST Model Status](model_server_rest_api_kfs.md)
+
+## MediaPipe Graps Examples
+
+[Image classification](../demos/mediapipe/image_classification/README.md)
+
+[Multi model](../demos/mediapipe/multi_model_graph/README.md)
+
+## Current limitations
+- It is preview version of the MediaPipe integrations which means that its not ready to be used in production and only some of the OVMS features are supported for Mediapipe graphs.
+
+- Mediapipe graphs are supported only for GRPC KFS API. Only TFS calls supported are get model status and config reload.
+
+- Binary inputs are not supported for MediaPipe graphs.
+
+- Public images do not include mediapipe feature.
+
+- Making changes in subconfig file does not trigger config reloads. Main config changes are monitored and triggers subconfig reload even if those weren't changed.
diff --git a/docs/model_cache.md b/docs/model_cache.md
index 00d493a175..d409f948a3 100644
--- a/docs/model_cache.md
+++ b/docs/model_cache.md
@@ -1,7 +1,7 @@
# Model Cache {#ovms_docs_model_cache}
## Overview
-The Model Server can leverage a [OpenVINO™ model cache functionality](https://docs.openvino.ai/2022.2/openvino_docs_IE_DG_Model_caching_overview.html), to speed up subsequent model loading on a target device.
+The Model Server can leverage a [OpenVINO™ model cache functionality](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_Model_caching_overview.html), to speed up subsequent model loading on a target device.
The cached files make the Model Server initialization usually faster.
The boost depends on a model and a target device. The most noticable improvement will be observed with GPU devices. On other devices, like CPU, it is possible to observe no speed up effect or even slower loading process depending on used model. Test the setup before final deployment.
@@ -42,8 +42,6 @@ In case there are valid reasons to enable the model cache also for models with a
> IMPORTANT: Models imported via the custom loaders never create or use any cache.
-> IMPORTANT: Model cache can't be used with HDDL devices.
-
## Use case example
### Prepare model
diff --git a/docs/model_server_c_api.md b/docs/model_server_c_api.md
index b62e0411d6..eca9d5b4bc 100644
--- a/docs/model_server_c_api.md
+++ b/docs/model_server_c_api.md
@@ -11,6 +11,12 @@ Server functionalities are encapsulated in shared library built from OVMS source
Calling a method to start the model serving in your application initiates the OVMS as a separate thread. Then you can schedule inference both directly from app using C API and gRPC/HTTP endpoints.
+API is versioned according to [SemVer 2.0](https://semver.org/). Calling `OVMS_ApiVersion` it is possible to get `major` and `minor` version number.
+- major - incremented when new, backward incompatible changes are introduced to the API itself (API call removal, name change, parameter change)
+- minor - incremented when API is modified but backward compatible (new API call added)
+
+There is no patch version number. Underlying functionality changes not related to API itself are tracked via OVMS version. OVMS and OpenVINO versions can be tracked via logs or `ServerMetadata` request (via KServe API).
+
### Server configuration and start
To start OVMS you need to create `OVMS_Server` object using `OVMS_ServerNew`, with set of `OVMS_ServerSettings` and `OVMS_ModelsSettings` that describe how the server should be configured. Once the server is started using `OVMS_ServerStartFromConfigurationFile` you can schedule the inferences using `OVMS_Inference`. To stop server, you must call `OVMS_ServerDelete`. While the server is alive you can schedule both in process inferences as well as use gRPC API to schedule inferences from remote machine. Optionally you can also enable HTTP service. Example how to use OVMS with C/C++ application is [here](../demos/c_api_minimal_app/README.md).
diff --git a/docs/model_server_grpc_api_kfs.md b/docs/model_server_grpc_api_kfs.md
index bcd140dca8..378307e0f0 100644
--- a/docs/model_server_grpc_api_kfs.md
+++ b/docs/model_server_grpc_api_kfs.md
@@ -45,9 +45,9 @@ Run inference with requested model or [DAG](./dag_scheduler.md).
Check KServe documentation for more [details](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#inference-1).
-> **NOTE**: Inference supports putting tensor buffers either in `ModelInferRequest`'s [InferTensorContents](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/grpc_predict_v2.proto#L155) and [raw_input_contents](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/grpc_predict_v2.proto#L202). There is no support for BF16 data type and there is no support for using FP16 in `InferTensorContents`. In case of sending raw images jpeg files BYTES data type should be used and data should be put in `InferTensorContents`'s `bytes_contents` or `raw_input_contents` for batch size equal to 1.
+> **NOTE**: Inference supports putting tensor buffers either in `ModelInferRequest`'s [InferTensorContents](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/grpc_predict_v2.proto#L155) and [raw_input_contents](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/grpc_predict_v2.proto#L202). There is no support for BF16 data type and there is no support for using FP16 in `InferTensorContents`. In case of sending images files or strings BYTES data type should be used and data should be put in `InferTensorContents`'s `bytes_contents` or `raw_input_contents`.
-Also, using `BYTES` datatype it is possible to send binary encoded images that would be preprocessed by OVMS using opencv and converted to OpenVINO-friendly format. For more information check [how binary data is handled in OpenVINO Model Server](./binary_input_kfs.md)
+Also, using `BYTES` datatype it is possible to send to model or pipeline, that have 4 (or 5 in case of [demultiplexing](demultiplexing.md)) shape dimensions, binary encoded images that would be preprocessed by OVMS using opencv and converted to OpenVINO-friendly format. For more information check [how binary data is handled in OpenVINO Model Server](./binary_input_kfs.md)
## See Also
diff --git a/docs/model_server_rest_api_kfs.md b/docs/model_server_rest_api_kfs.md
index 1d5f7b9792..d54c817c45 100644
--- a/docs/model_server_rest_api_kfs.md
+++ b/docs/model_server_rest_api_kfs.md
@@ -225,9 +225,11 @@ $request_output =
}
```
-Besides numerical values, it is possible to pass binary inputs using Binary Data extension:
+> Note: In `tensor_data` elements may be presented in their multi-dimensional representation, or as a flattened one-dimensional representation. Before inference execution tensor data is flattened, and only elements count in `tensor_data` is validated.
-As a JPEG / PNG encoded images - in this case binary encoded data is loaded by OVMS using OpenCV which then converts it to OpenVINO-friendly data format for inference. For encoded inputs datatype `BYTES` is reserved.
+Besides numerical values, it is possible to pass encoded images using Binary Data extension:
+
+As a JPEG / PNG encoded images - in this case binary encoded data is loaded by OVMS using OpenCV which then converts it to OpenVINO-friendly data format for inference. Input is treated as encoded image when datatype is `BYTES` and model or pipeline have 4 (or 5 in case of [demultiplexing](demultiplexing.md)) shape dimensions. Every batch the BYTES input needs to be preced by 4 bytes, litte endian, that contains its size.
```JSON
Content-Type: application/octet-stream
@@ -243,7 +245,7 @@ Content-Length:
}
]
}
-<9472 bytes of data for model_input tensor>
+<0x00250000 (9472 as four bytes little endian)><9472 bytes of data for model_input tensor>
```
@@ -267,6 +269,8 @@ Content-Length:
<3240000 bytes of the whole data batch for model_input tensor>
```
+*sending strings inside binary extension also require preceding every batch by 4 bytes, litte endian, that contains its size.
+
Check [how binary data is handled in OpenVINO Model Server](./binary_input.md) for more informations.
diff --git a/docs/models_repository.md b/docs/models_repository.md
index 26ee01c7a7..06fe9ea4de 100644
--- a/docs/models_repository.md
+++ b/docs/models_repository.md
@@ -1,19 +1,17 @@
# Preparing a Model Repository {#ovms_docs_models_repository}
The AI models served by OpenVINO™ Model Server must be in either of the four formats:
-- [OpenVINO IR](https://docs.openvino.ai/2022.2/openvino_docs_MO_DG_IR_and_opsets.html#doxid-openvino-docs-m-o-d-g-i-r-and-opsets), where the graph is represented in .bin and .xml files
+- [OpenVINO IR](https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_IR_and_opsets.html#doxid-openvino-docs-m-o-d-g-i-r-and-opsets), where the graph is represented in .bin and .xml files
- [ONNX](https://onnx.ai/), using the .onnx file
- [PaddlePaddle](https://www.paddlepaddle.org.cn/en), using .pdiparams and .pdmodel files
-- [TensorFlow](https://www.tensorflow.org/), using frozen graph format with .pb extension (preview feature)
+- [TensorFlow](https://www.tensorflow.org/), using SavedModel, MetaGraph or frozen Protobuf formats.
To use models trained in other formats you need to convert them first. To do so, use
-OpenVINO’s [Model Optimizer](https://docs.openvino.ai/2022.2/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html) for IR, or different
+OpenVINO’s [Model Optimizer](https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html) for IR, or different
[converters](https://onnx.ai/supported-tools.html) for ONNX.
-The feature of direct import of Tensorflow models is currently a preview feature. Currently it supports only the frozen graph and not all topologies can be used that way.
-For unsupported models you can use the Model Optimizer to convert the model to IR format.
-
The models need to be placed and mounted in a particular directory structure and according to the following rules:
+When the models are hosted on the cloud storage, they should be frozen to be imported successfully.
```
tree models/
@@ -37,9 +35,15 @@ models/
│ └── 1
│ ├── model.pdiparams
│ └── model.pdmodel
-└── model5
+├── model5
+│ └── 1
+│ ├── model.pdiparams
+│ └── model.pdmodel
+└── model6
└── 1
- └── TF_fronzen_model.pb
+ ├── variables
+ └── saved_model.pb
+
```
- Each model should be stored in a dedicated directory, e.g. model1 and model2.
diff --git a/docs/ovms.png b/docs/ovms.png
deleted file mode 100644
index 2e51f20616..0000000000
Binary files a/docs/ovms.png and /dev/null differ
diff --git a/docs/ovms_quickstart.md b/docs/ovms_quickstart.md
index 5d3abace70..70705d069a 100644
--- a/docs/ovms_quickstart.md
+++ b/docs/ovms_quickstart.md
@@ -1,12 +1,12 @@
# Quickstart Guide {#ovms_docs_quick_start_guide}
-OpenVINO Model Server can perform inference using pre-trained models in either [OpenVINO IR](https://docs.openvino.ai/2022.2/openvino_docs_MO_DG_IR_and_opsets.html#doxid-openvino-docs-m-o-d-g-i-r-and-opsets)
-, [ONNX](https://onnx.ai/), PaddlePaddle[https://github.com/PaddlePaddle/Paddle] or TensorFlow format [https://www.tensorflow.org/]. You can get them by:
+OpenVINO Model Server can perform inference using pre-trained models in either [OpenVINO IR](https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_IR_and_opsets.html#doxid-openvino-docs-m-o-d-g-i-r-and-opsets)
+, [ONNX](https://onnx.ai/), [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) or [TensorFlow](https://www.tensorflow.org/) format. You can get them by:
- downloading models from [Open Model Zoo](https://storage.openvinotoolkit.org/repositories/open_model_zoo/public/2022.1/)
-- converting other formats using [Model Optimizer](https://docs.openvino.ai/2022.2/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html)
+- converting other formats using [Model Optimizer](https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html)
-This guide uses a [face detection model](https://docs.openvino.ai/latest/omz_models_model_face_detection_retail_0004.html) in IR format.
+This guide uses a [face detection model](https://docs.openvino.ai/2023.0/omz_models_model_face_detection_retail_0004.html) in IR format.
To quickly start using OpenVINO™ Model Server follow these steps:
1. Prepare Docker
diff --git a/docs/parameters.md b/docs/parameters.md
index 0bfc3545bf..3bff8303f7 100644
--- a/docs/parameters.md
+++ b/docs/parameters.md
@@ -6,14 +6,14 @@
| Option | Value format | Description |
|---|---|---|
| `"model_name"/"name"` | `string` | Model name exposed over gRPC and REST API.(use `model_name` in command line, `name` in json config) |
-| `"model_path"/"base_path"` | `string` | If using a Google Cloud Storage, Azure Storage or S3 path, see [cloud storage guide](./using_cloud_storage.md). The path may look as follows: `"/opt/ml/models/model"` `"gs://bucket/models/model"` `"s3://bucket/models/model"` `"azure://bucket/models/model"` (use `model_path` in command line, `base_path` in json config) |
-| `"shape"` | `tuple/json/"auto"` | `shape` is optional and takes precedence over `batch_size`. The `shape` argument changes the model that is enabled in the model server to fit the parameters. `shape` accepts three forms of the values: * `auto` - The model server reloads the model with the shape that matches the input data matrix. * a tuple, such as `(1,3,224,224)` - The tuple defines the shape to use for all incoming requests for models with a single input. * A dictionary of shapes, such as `{"input1":"(1,3,224,224)","input2":"(1,3,50,50)", "input3":"auto"}` - This option defines the shape of every included input in the model.Some models don't support the reshape operation.If the model can't be reshaped, it remains in the original parameters and all requests with incompatible input format result in an error. See the logs for more information about specific errors.Learn more about supported model graph layers including all limitations at [Shape Inference Document](https://docs.openvino.ai/2022.2/openvino_docs_IE_DG_ShapeInference.html). |
+| `"model_path"/"base_path"` | `string` | If using a Google Cloud Storage, Azure Storage or S3 path, see [cloud storage guide](./using_cloud_storage.md). The path may look as follows: `"/opt/ml/models/model"` `"gs://bucket/models/model"` `"s3://bucket/models/model"` `"azure://bucket/models/model"` The path can be also relative to the config.json location (use `model_path` in command line, `base_path` in json config) |
+| `"shape"` | `tuple/json/"auto"` | `shape` is optional and takes precedence over `batch_size`. The `shape` argument changes the model that is enabled in the model server to fit the parameters. `shape` accepts three forms of the values: * `auto` - The model server reloads the model with the shape that matches the input data matrix. * a tuple, such as `(1,3,224,224)` - The tuple defines the shape to use for all incoming requests for models with a single input. * A dictionary of shapes, such as `{"input1":"(1,3,224,224)","input2":"(1,3,50,50)", "input3":"auto"}` - This option defines the shape of every included input in the model.Some models don't support the reshape operation.If the model can't be reshaped, it remains in the original parameters and all requests with incompatible input format result in an error. See the logs for more information about specific errors.Learn more about supported model graph layers including all limitations at [Shape Inference Document](https://docs.openvino.ai/2023.0/openvino_docs_OV_UG_ShapeInference.html). |
| `"batch_size"` | `integer/"auto"` | Optional. By default, the batch size is derived from the model, defined through the OpenVINO Model Optimizer. `batch_size` is useful for sequential inference requests of the same batch size.Some models, such as object detection, don't work correctly with the `batch_size` parameter. With these models, the output's first dimension doesn't represent the batch size. You can set the batch size for these models by using network reshaping and setting the `shape` parameter appropriately.The default option of using the Model Optimizer to determine the batch size uses the size of the first dimension in the first input for the size. For example, if the input shape is `(1, 3, 225, 225)`, the batch size is set to `1`. If you set `batch_size` to a numerical value, the model batch size is changed when the service starts.`batch_size` also accepts a value of `auto`. If you use `auto`, then the served model batch size is set according to the incoming data at run time. The model is reloaded each time the input data changes the batch size. You might see a delayed response upon the first request. |
| `"layout" `| `json/string` | `layout` is optional argument which allows to define or change the layout of model input and output tensors. To change the layout (add the transposition step), specify `: