From 31ad50a3feb9b148fcc88f9c2dab431335dc01fc Mon Sep 17 00:00:00 2001 From: Damian Kalinowski Date: Mon, 17 Jun 2024 13:51:14 +0200 Subject: [PATCH] [release branch] [DOC] Links, typos and public package (#2506) Fix links /nightly/ -> /2024/ Fix some of the links from /main/ to /releases/2024/2 Fix typos CVS-144337 Public OpenVINO link --- Makefile | 6 +- README.md | 40 ++++++------- client/go/kserve-api/Dockerfile | 2 +- client/java/kserve-api/pom.xml | 2 +- client/python/ovmsclient/lib/README.md | 4 +- .../ovmsclient/lib/docs/pypi_overview.md | 4 +- .../ovmsclient/custom/management_client.py | 4 +- demos/README.md | 12 ++-- demos/age_gender_recognition/python/README.md | 2 +- demos/benchmark/cpp/README.md | 2 +- demos/benchmark/python/README.md | 2 +- .../bert_question_answering/python/README.md | 2 +- demos/common/cpp/Dockerfile | 4 +- demos/face_blur/python/README.md | 4 +- demos/face_detection/python/README.md | 2 +- demos/image_classification/cpp/README.md | 2 +- demos/image_classification/python/README.md | 2 +- demos/mediapipe/holistic_tracking/README.md | 2 +- demos/mediapipe/iris_tracking/README.md | 2 +- demos/mediapipe/multi_model_graph/README.md | 2 +- demos/mediapipe/object_detection/README.md | 2 +- demos/model_ensemble/python/README.md | 4 +- .../python/README.md | 6 +- .../python/README.md | 8 +-- .../clip_image_classification/README.md | 2 +- demos/python_demos/rag_chatbot/README.md | 2 +- .../python/README.md | 4 +- .../python/README.md | 2 +- demos/using_onnx_model/python/README.md | 4 +- .../python/README.md | 4 +- docs/accelerators.md | 4 +- docs/binary_input_kfs.md | 6 +- docs/binary_input_tfs.md | 4 +- docs/build_from_source.md | 2 +- docs/clients_kfs.md | 4 +- docs/clients_tfs.md | 4 +- docs/custom_model_loader.md | 8 +-- docs/custom_node_development.md | 30 +++++----- docs/dag_scheduler.md | 2 +- docs/deploying_server.md | 4 +- docs/dynamic_bs_auto_reload.md | 2 +- docs/dynamic_bs_demultiplexer.md | 2 +- docs/dynamic_input.md | 2 +- docs/dynamic_shape_auto_reload.md | 2 +- docs/dynamic_shape_binary_inputs.md | 4 +- docs/dynamic_shape_custom_node.md | 4 +- docs/dynamic_shape_dynamic_model.md | 2 +- docs/llm/reference.md | 6 +- docs/mediapipe.md | 4 +- docs/mediapipe_conversion.md | 2 +- docs/metrics.md | 4 +- docs/model_server_c_api.md | 2 +- docs/model_server_grpc_api_kfs.md | 4 +- docs/model_server_grpc_api_tfs.md | 8 +-- docs/model_server_rest_api_chat.md | 2 +- docs/model_server_rest_api_completions.md | 2 +- docs/model_server_rest_api_kfs.md | 12 ++-- docs/model_server_rest_api_tfs.md | 6 +- docs/ovms_quickstart.md | 4 +- docs/python_support/reference.md | 56 +++++++++---------- docs/writing_app.md | 2 +- extras/nginx-mtls-auth/get_model.sh | 2 +- src/custom_nodes/east_ocr/README.md | 2 +- src/custom_nodes/face_blur/README.md | 4 +- src/custom_nodes/horizontal_ocr/README.md | 2 +- .../image_transformation/README.md | 2 +- .../README.md | 2 +- src/example/SampleCpuExtension/README.md | 2 +- 68 files changed, 178 insertions(+), 178 deletions(-) diff --git a/Makefile b/Makefile index b342c4c4c4..44c88d48b7 100644 --- a/Makefile +++ b/Makefile @@ -154,11 +154,11 @@ ifeq ($(findstring ubuntu,$(BASE_OS)),ubuntu) ifeq ($(BASE_OS_TAG),20.04) OS=ubuntu20 INSTALL_DRIVER_VERSION ?= "22.43.24595" - DLDT_PACKAGE_URL ?= http://s3.toolbox.iotg.sclab.intel.com/ov-packages/l_openvino_toolkit_ubuntu20_2024.2.0.15519.5c0f38f83f6_x86_64.tgz + DLDT_PACKAGE_URL ?= https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.2/linux/l_openvino_toolkit_ubuntu20_2024.2.0.15519.5c0f38f83f6_x86_64.tgz else ifeq ($(BASE_OS_TAG),22.04) OS=ubuntu22 INSTALL_DRIVER_VERSION ?= "23.22.26516" - DLDT_PACKAGE_URL ?= http://s3.toolbox.iotg.sclab.intel.com/ov-packages/l_openvino_toolkit_ubuntu22_2024.2.0.15519.5c0f38f83f6_x86_64.tgz + DLDT_PACKAGE_URL ?= https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.2/linux/l_openvino_toolkit_ubuntu22_2024.2.0.15519.5c0f38f83f6_x86_64.tgz endif endif ifeq ($(BASE_OS),redhat) @@ -173,7 +173,7 @@ ifeq ($(BASE_OS),redhat) endif DIST_OS=redhat INSTALL_DRIVER_VERSION ?= "23.22.26516" - DLDT_PACKAGE_URL ?= http://s3.toolbox.iotg.sclab.intel.com/ov-packages/l_openvino_toolkit_rhel8_2024.2.0.15519.5c0f38f83f6_x86_64.tgz + DLDT_PACKAGE_URL ?= https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.2/linux/l_openvino_toolkit_rhel8_2024.2.0.15519.5c0f38f83f6_x86_64.tgz endif OVMS_CPP_DOCKER_IMAGE ?= openvino/model_server diff --git a/README.md b/README.md index 36bc5ec50d..d436ab8a51 100644 --- a/README.md +++ b/README.md @@ -15,22 +15,22 @@ OpenVINO™ Model Server (OVMS) is a high-performance system for serving mod ![OVMS picture](docs/ovms_high_level.png) -The models used by the server need to be stored locally or hosted remotely by object storage services. For more details, refer to [Preparing Model Repository](https://docs.openvino.ai/nightly/ovms_docs_models_repository.html) documentation. Model server works inside [Docker containers](https://docs.openvino.ai/nightly/ovms_docs_deploying_server.html#deploying-model-server-in-docker-container), on [Bare Metal](https://docs.openvino.ai/nightly/ovms_docs_deploying_server.html#deploying-model-server-on-baremetal-without-container), and in [Kubernetes environment](https://docs.openvino.ai/nightly/ovms_docs_deploying_server.html#deploying-model-server-in-kubernetes). -Start using OpenVINO Model Server with a fast-forward serving example from the [Quickstart guide](https://docs.openvino.ai/nightly/ovms_docs_quick_start_guide.html) or explore [Model Server features](https://docs.openvino.ai/nightly/ovms_docs_features.html). +The models used by the server need to be stored locally or hosted remotely by object storage services. For more details, refer to [Preparing Model Repository](https://docs.openvino.ai/2024/ovms_docs_models_repository.html) documentation. Model server works inside [Docker containers](https://docs.openvino.ai/2024/ovms_docs_deploying_server.html#deploying-model-server-in-docker-container), on [Bare Metal](https://docs.openvino.ai/2024/ovms_docs_deploying_server.html#deploying-model-server-on-baremetal-without-container), and in [Kubernetes environment](https://docs.openvino.ai/2024/ovms_docs_deploying_server.html#deploying-model-server-in-kubernetes). +Start using OpenVINO Model Server with a fast-forward serving example from the [Quickstart guide](https://docs.openvino.ai/2024/ovms_docs_quick_start_guide.html) or explore [Model Server features](https://docs.openvino.ai/2024/ovms_docs_features.html). Read [release notes](https://github.com/openvinotoolkit/model_server/releases) to find out what’s new. ### Key features: -- **[NEW]** [Efficient Text Generation via OpenAI API - preview](https://docs.openvino.ai/nightly/ovms_docs_llm_reference.html) -- [Python code execution](https://docs.openvino.ai/nightly/ovms_docs_python_support_reference.html) -- [gRPC streaming](https://docs.openvino.ai/nightly/ovms_docs_streaming_endpoints.html) -- [MediaPipe graphs serving](https://docs.openvino.ai/nightly/ovms_docs_mediapipe.html) -- Model management - including [model versioning](https://docs.openvino.ai/nightly/ovms_docs_model_version_policy.html) and [model updates in runtime](https://docs.openvino.ai/nightly/ovms_docs_online_config_changes.html) -- [Dynamic model inputs](https://docs.openvino.ai/nightly/ovms_docs_shape_batch_layout.html) -- [Directed Acyclic Graph Scheduler](https://docs.openvino.ai/nightly/ovms_docs_dag.html) along with [custom nodes in DAG pipelines](https://docs.openvino.ai/nightly/ovms_docs_custom_node_development.html) -- [Metrics](https://docs.openvino.ai/nightly/ovms_docs_metrics.html) - metrics compatible with Prometheus standard +- **[NEW]** [Efficient Text Generation via OpenAI API - preview](https://docs.openvino.ai/2024/ovms_docs_llm_reference.html) +- [Python code execution](https://docs.openvino.ai/2024/ovms_docs_python_support_reference.html) +- [gRPC streaming](https://docs.openvino.ai/2024/ovms_docs_streaming_endpoints.html) +- [MediaPipe graphs serving](https://docs.openvino.ai/2024/ovms_docs_mediapipe.html) +- Model management - including [model versioning](https://docs.openvino.ai/2024/ovms_docs_model_version_policy.html) and [model updates in runtime](https://docs.openvino.ai/2024/ovms_docs_online_config_changes.html) +- [Dynamic model inputs](https://docs.openvino.ai/2024/ovms_docs_shape_batch_layout.html) +- [Directed Acyclic Graph Scheduler](https://docs.openvino.ai/2024/ovms_docs_dag.html) along with [custom nodes in DAG pipelines](https://docs.openvino.ai/2024/ovms_docs_custom_node_development.html) +- [Metrics](https://docs.openvino.ai/2024/ovms_docs_metrics.html) - metrics compatible with Prometheus standard - Support for multiple frameworks, such as TensorFlow, PaddlePaddle and ONNX -- Support for [AI accelerators](https://docs.openvino.ai/nightly/about-openvino/compatibility-and-support/supported-devices.html) +- Support for [AI accelerators](https://docs.openvino.ai/2024/about-openvino/compatibility-and-support/supported-devices.html) **Note:** OVMS has been tested on RedHat, and Ubuntu. The latest publicly released docker images are based on Ubuntu and UBI. They are stored in: @@ -40,26 +40,26 @@ They are stored in: ## Run OpenVINO Model Server -A demonstration on how to use OpenVINO Model Server can be found in [our quick-start guide](https://docs.openvino.ai/nightly/ovms_docs_quick_start_guide.html). +A demonstration on how to use OpenVINO Model Server can be found in [our quick-start guide](https://docs.openvino.ai/2024/ovms_docs_quick_start_guide.html). For more information on using Model Server in various scenarios you can check the following guides: -* [Model repository configuration](https://docs.openvino.ai/nightly/ovms_docs_models_repository.html) +* [Model repository configuration](https://docs.openvino.ai/2024/ovms_docs_models_repository.html) -* [Deployment options](https://docs.openvino.ai/nightly/ovms_docs_deploying_server.html) +* [Deployment options](https://docs.openvino.ai/2024/ovms_docs_deploying_server.html) -* [Performance tuning](https://docs.openvino.ai/nightly/ovms_docs_performance_tuning.html) +* [Performance tuning](https://docs.openvino.ai/2024/ovms_docs_performance_tuning.html) -* [Directed Acyclic Graph Scheduler](https://docs.openvino.ai/nightly/ovms_docs_dag.html) +* [Directed Acyclic Graph Scheduler](https://docs.openvino.ai/2024/ovms_docs_dag.html) -* [Custom nodes development](https://docs.openvino.ai/nightly/ovms_docs_custom_node_development.html) +* [Custom nodes development](https://docs.openvino.ai/2024/ovms_docs_custom_node_development.html) -* [Serving stateful models](https://docs.openvino.ai/nightly/ovms_docs_stateful_models.html) +* [Serving stateful models](https://docs.openvino.ai/2024/ovms_docs_stateful_models.html) * [Deploy using a Kubernetes Helm Chart](https://github.com/openvinotoolkit/operator/tree/main/helm-charts/ovms) * [Deployment using Kubernetes Operator](https://operatorhub.io/operator/ovms-operator) -* [Using binary input data](https://docs.openvino.ai/nightly/ovms_docs_binary_input.html) +* [Using binary input data](https://docs.openvino.ai/2024/ovms_docs_binary_input.html) @@ -73,7 +73,7 @@ For more information on using Model Server in various scenarios you can check th * [RESTful API](https://restfulapi.net/) -* [Benchmarking results](https://docs.openvino.ai/nightly/openvino_docs_performance_benchmarks.html) +* [Benchmarking results](https://docs.openvino.ai/2024/openvino_docs_performance_benchmarks.html) * [Speed and Scale AI Inference Operations Across Multiple Architectures](https://techdecoded.intel.io/essentials/speed-and-scale-ai-inference-operations-across-multiple-architectures/?elq_cid=3646480_ts1607680426276&erpm_id=6470692_ts1607680426276) - webinar recording diff --git a/client/go/kserve-api/Dockerfile b/client/go/kserve-api/Dockerfile index 50e2715b92..4d032095ef 100644 --- a/client/go/kserve-api/Dockerfile +++ b/client/go/kserve-api/Dockerfile @@ -26,7 +26,7 @@ RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34 RUN go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@v1.4.0 # Compile API -RUN wget https://raw.githubusercontent.com/openvinotoolkit/model_server/main/src/kfserving_api/grpc_predict_v2.proto +RUN wget https://raw.githubusercontent.com/openvinotoolkit/model_server/releases/2024/2/src/kfserving_api/grpc_predict_v2.proto RUN echo 'option go_package = "./grpc-client";' >> grpc_predict_v2.proto RUN protoc --go_out="./" --go-grpc_out="./" ./grpc_predict_v2.proto diff --git a/client/java/kserve-api/pom.xml b/client/java/kserve-api/pom.xml index 7d8ea12007..fed2eddfc3 100644 --- a/client/java/kserve-api/pom.xml +++ b/client/java/kserve-api/pom.xml @@ -64,7 +64,7 @@ - https://raw.githubusercontent.com/openvinotoolkit/model_server/main/src/kfserving_api/grpc_predict_v2.proto + https://raw.githubusercontent.com/openvinotoolkit/model_server/releases/2024/2/src/kfserving_api/grpc_predict_v2.proto grpc_predict_v2.proto src/main/proto diff --git a/client/python/ovmsclient/lib/README.md b/client/python/ovmsclient/lib/README.md index 444aefd8f9..141381333c 100644 --- a/client/python/ovmsclient/lib/README.md +++ b/client/python/ovmsclient/lib/README.md @@ -6,7 +6,7 @@ OVMS client library contains only the necessary dependencies, so the whole packa As OpenVINO Model Server API is compatible with TensorFlow Serving, it's possible to use `ovmsclient` with TensorFlow Serving instances on: Predict, GetModelMetadata and GetModelStatus endpoints. -See [API documentation](https://github.com/openvinotoolkit/model_server/blob/main/client/python/ovmsclient/lib/docs/README.md) for details on what the library provides. +See [API documentation](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/client/python/ovmsclient/lib/docs/README.md) for details on what the library provides. ```bash git clone https://github.com/openvinotoolkit/model_server.git @@ -136,4 +136,4 @@ results = client.predict(inputs=inputs, model_name="model") # ``` -For more details on `ovmsclient` see [API reference](https://github.com/openvinotoolkit/model_server/blob/main/client/python/ovmsclient/lib/docs/README.md) \ No newline at end of file +For more details on `ovmsclient` see [API reference](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/client/python/ovmsclient/lib/docs/README.md) \ No newline at end of file diff --git a/client/python/ovmsclient/lib/docs/pypi_overview.md b/client/python/ovmsclient/lib/docs/pypi_overview.md index f78e65ce10..dd426fca56 100644 --- a/client/python/ovmsclient/lib/docs/pypi_overview.md +++ b/client/python/ovmsclient/lib/docs/pypi_overview.md @@ -9,7 +9,7 @@ The `ovmsclient` package works both with OpenVINO™ Model Server and Tensor The `ovmsclient` can replace `tensorflow-serving-api` package with reduced footprint and simplified interface. -See [API reference](https://github.com/openvinotoolkit/model_server/blob/main/client/python/ovmsclient/lib/docs/README.md) for usage details. +See [API reference](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/client/python/ovmsclient/lib/docs/README.md) for usage details. ## Usage example @@ -38,4 +38,4 @@ results = client.predict(inputs=inputs, model_name="model") ``` -Learn more on `ovmsclient` [documentation site](https://github.com/openvinotoolkit/model_server/tree/main/client/python/ovmsclient/lib). \ No newline at end of file +Learn more on `ovmsclient` [documentation site](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/client/python/ovmsclient/lib). \ No newline at end of file diff --git a/client/python/ovmsclient/lib/ovmsclient/custom/management_client.py b/client/python/ovmsclient/lib/ovmsclient/custom/management_client.py index f7d79cd10f..e6f1acc686 100644 --- a/client/python/ovmsclient/lib/ovmsclient/custom/management_client.py +++ b/client/python/ovmsclient/lib/ovmsclient/custom/management_client.py @@ -26,7 +26,7 @@ def reload_servables(self): ConfigStatusResponse object with all models and their versions statuses Raises: - Exceptions for different serving reponses... + Exceptions for different serving responses... Examples: @@ -50,7 +50,7 @@ def get_servables(self): ConfigStatusResponse object with all models and their versions statuses Raises: - Exceptions for different serving reponses... + Exceptions for different serving responses... Examples: diff --git a/demos/README.md b/demos/README.md index a228116ddb..77dc504196 100644 --- a/demos/README.md +++ b/demos/README.md @@ -53,23 +53,23 @@ Check out the list below to see complete step-by-step examples of using OpenVINO |[CLIP image classification](python_demos/clip_image_classification/README.md) | Classify image according to provided labels using CLIP model embedded in a multi-node MediaPipe graph.| |[Seq2seq translation](python_demos/seq2seq_translation/README.md) | Translate text using seq2seq model via gRPC API.| |[Age gender recognition](age_gender_recognition/python/README.md) | Run prediction on a JPEG image using age gender recognition model via gRPC API.| -|[Horizontal Text Detection in Real-Time](horizontal_text_detection/python/README.md) | Run prediction on camera stream using a horizontal text detection model via gRPC API. This demo uses [pipeline](../docs/dag_scheduler.md) with [horizontal_ocr custom node](https://github.com/openvinotoolkit/model_server/tree/main/src/custom_nodes/horizontal_ocr) and [demultiplexer](../docs/demultiplexing.md). | -|[Optical Character Recognition Pipeline](optical_character_recognition/python/README.md) | Run prediction on a JPEG image using a pipeline of text recognition and text detection models with a custom node for intermediate results processing via gRPC API. This demo uses [pipeline](../docs/dag_scheduler.md) with [east_ocr custom node](https://github.com/openvinotoolkit/model_server/tree/main/src/custom_nodes/east_ocr) and [demultiplexer](../docs/demultiplexing.md). | +|[Horizontal Text Detection in Real-Time](horizontal_text_detection/python/README.md) | Run prediction on camera stream using a horizontal text detection model via gRPC API. This demo uses [pipeline](../docs/dag_scheduler.md) with [horizontal_ocr custom node](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/src/custom_nodes/horizontal_ocr) and [demultiplexer](../docs/demultiplexing.md). | +|[Optical Character Recognition Pipeline](optical_character_recognition/python/README.md) | Run prediction on a JPEG image using a pipeline of text recognition and text detection models with a custom node for intermediate results processing via gRPC API. This demo uses [pipeline](../docs/dag_scheduler.md) with [east_ocr custom node](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/src/custom_nodes/east_ocr) and [demultiplexer](../docs/demultiplexing.md). | |[Face Detection](face_detection/python/README.md)|Run prediction on a JPEG image using face detection model via gRPC API.| |[Single Face Analysis Pipeline](single_face_analysis_pipeline/python/README.md)|Run prediction on a JPEG image using a simple pipeline of age-gender recognition and emotion recognition models via gRPC API to analyze image with a single face. This demo uses [pipeline](../docs/dag_scheduler.md) | -|[Multi Faces Analysis Pipeline](multi_faces_analysis_pipeline/python/README.md)|Run prediction on a JPEG image using a pipeline of age-gender recognition and emotion recognition models via gRPC API to extract multiple faces from the image and analyze all of them. This demo uses [pipeline](../docs/dag_scheduler.md) with [model_zoo_intel_object_detection custom node](https://github.com/openvinotoolkit/model_server/tree/main/src/custom_nodes/model_zoo_intel_object_detection) and [demultiplexer](../docs/demultiplexing.md) | +|[Multi Faces Analysis Pipeline](multi_faces_analysis_pipeline/python/README.md)|Run prediction on a JPEG image using a pipeline of age-gender recognition and emotion recognition models via gRPC API to extract multiple faces from the image and analyze all of them. This demo uses [pipeline](../docs/dag_scheduler.md) with [model_zoo_intel_object_detection custom node](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/src/custom_nodes/model_zoo_intel_object_detection) and [demultiplexer](../docs/demultiplexing.md) | |[Model Ensemble Pipeline](model_ensemble/python/README.md)|Combine multiple image classification models into one [pipeline](../docs/dag_scheduler.md) and aggregate results to improve classification accuracy. | |[Image Classification](image_classification/python/README.md)|Run prediction on a JPEG image using image classification model via gRPC API.| -|[Using ONNX Model](using_onnx_model/python/README.md)|Run prediction on a JPEG image using image classification ONNX model via gRPC API in two preprocessing variants. This demo uses [pipeline](../docs/dag_scheduler.md) with [image_transformation custom node](https://github.com/openvinotoolkit/model_server/tree/main/src/custom_nodes/image_transformation). | +|[Using ONNX Model](using_onnx_model/python/README.md)|Run prediction on a JPEG image using image classification ONNX model via gRPC API in two preprocessing variants. This demo uses [pipeline](../docs/dag_scheduler.md) with [image_transformation custom node](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/src/custom_nodes/image_transformation). | |[Using TensorFlow Model](image_classification_using_tf_model/python/README.md)|Run image classification using directly imported TensorFlow model. | |[Person, Vehicle, Bike Detection](person_vehicle_bike_detection/python/README.md)|Run prediction on a video file or camera stream using person, vehicle, bike detection model via gRPC API.| -|[Vehicle Analysis Pipeline](vehicle_analysis_pipeline/python/README.md)|Detect vehicles and recognize their attributes using a pipeline of vehicle detection and vehicle attributes recognition models with a custom node for intermediate results processing via gRPC API. This demo uses [pipeline](../docs/dag_scheduler.md) with [model_zoo_intel_object_detection custom node](https://github.com/openvinotoolkit/model_server/tree/main/src/custom_nodes/model_zoo_intel_object_detection). | +|[Vehicle Analysis Pipeline](vehicle_analysis_pipeline/python/README.md)|Detect vehicles and recognize their attributes using a pipeline of vehicle detection and vehicle attributes recognition models with a custom node for intermediate results processing via gRPC API. This demo uses [pipeline](../docs/dag_scheduler.md) with [model_zoo_intel_object_detection custom node](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/src/custom_nodes/model_zoo_intel_object_detection). | |[Real Time Stream Analysis](real_time_stream_analysis/python/README.md)| Analyze RTSP video stream in real time with generic application template for custom pre and post processing routines as well as simple results visualizer for displaying predictions in the browser. | |[Segmentation with PaddlePaddle](segmentation_using_paddlepaddle_model/python/README.md)| Perform segmentation on an image with a PaddlePaddle model. | |[Natural Language Processing with BERT](bert_question_answering/python/README.md)|Provide a knowledge source and a query and use BERT model for question answering use case via gRPC API. This demo uses dynamic shape feature. | |[Using inputs data in string format with universal-sentence-encoder model](universal-sentence-encoder/README.md)| Handling AI model with text as the model input. | |[Benchmark App](benchmark/python/README.md)|Generate traffic and measure performance of the model served in OpenVINO Model Server.| -|[Face Blur Pipeline](face_blur/python/README.md)|Detect faces and blur image using a pipeline of object detection models with a custom node for intermediate results processing via gRPC API. This demo uses [pipeline](../docs/dag_scheduler.md) with [face_blur custom node](https://github.com/openvinotoolkit/model_server/tree/main/src/custom_nodes/face_blur). | +|[Face Blur Pipeline](face_blur/python/README.md)|Detect faces and blur image using a pipeline of object detection models with a custom node for intermediate results processing via gRPC API. This demo uses [pipeline](../docs/dag_scheduler.md) with [face_blur custom node](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/src/custom_nodes/face_blur). | ## With C++ Client | Demo | Description | diff --git a/demos/age_gender_recognition/python/README.md b/demos/age_gender_recognition/python/README.md index cac20d80ec..cf5fc6e66b 100644 --- a/demos/age_gender_recognition/python/README.md +++ b/demos/age_gender_recognition/python/README.md @@ -35,7 +35,7 @@ Install python dependencies: ```bash pip3 install -r requirements.txt ``` -Run [age_gender_recognition.py](https://github.com/openvinotoolkit/model_server/blob/main/demos/age_gender_recognition/python/age_gender_recognition.py) script to make an inference: +Run [age_gender_recognition.py](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/demos/age_gender_recognition/python/age_gender_recognition.py) script to make an inference: ```bash python3 age_gender_recognition.py --image_input_path age-gender-recognition-retail-0001.jpg --rest_port 8000 ``` diff --git a/demos/benchmark/cpp/README.md b/demos/benchmark/cpp/README.md index bc133878dc..2591df5c01 100644 --- a/demos/benchmark/cpp/README.md +++ b/demos/benchmark/cpp/README.md @@ -10,7 +10,7 @@ make ``` The application can be used with any model or pipeline served in OVMS, by requesting `GetModelMetadata` endpoint and using such information to prepare synthetic inputs with matching shape and precision. -> **Note**: In this directory you can only see the code specific to the benchmark client. The code shared with other C++ demos as well as all building utilities are placed in the [common C++ directory](https://github.com/openvinotoolkit/model_server/tree/main/demos/common/cpp). +> **Note**: In this directory you can only see the code specific to the benchmark client. The code shared with other C++ demos as well as all building utilities are placed in the [common C++ directory](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/demos/common/cpp). > **Note**: It is required that endpoint does not use dynamic shape. diff --git a/demos/benchmark/python/README.md b/demos/benchmark/python/README.md index 62d92c988f..08983a5290 100644 --- a/demos/benchmark/python/README.md +++ b/demos/benchmark/python/README.md @@ -379,4 +379,4 @@ docker run -v ${PWD}/workspace:/workspace --network host benchmark_client -a loc ``` Many other client options together with benchmarking examples are presented in -[an additional PDF document](https://github.com/openvinotoolkit/model_server/blob/main/docs/python-benchmarking-client-16feb.pdf). +[an additional PDF document](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/docs/python-benchmarking-client-16feb.pdf). diff --git a/demos/bert_question_answering/python/README.md b/demos/bert_question_answering/python/README.md index 64cd0d5651..a3ea9e4c00 100644 --- a/demos/bert_question_answering/python/README.md +++ b/demos/bert_question_answering/python/README.md @@ -4,7 +4,7 @@ This document demonstrates how to run inference requests for [BERT model](https://github.com/openvinotoolkit/open_model_zoo/tree/2022.1.0/models/intel/bert-small-uncased-whole-word-masking-squad-int8-0002) with OpenVINO Model Server. It provides questions answering functionality. -In this example docker container with [bert-client image](https://github.com/openvinotoolkit/model_server/blob/main/demos/bert_question_answering/python/Dockerfile) runs the script [bert_question_answering.py](https://github.com/openvinotoolkit/model_server/blob/main/demos/bert_question_answering/python/bert_question_answering.py). It runs inference request for each paragraph on a given page in order to answer the provided question. Since each paragraph can have different size the functionality of dynamic shape is used. +In this example docker container with [bert-client image](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/demos/bert_question_answering/python/Dockerfile) runs the script [bert_question_answering.py](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/demos/bert_question_answering/python/bert_question_answering.py). It runs inference request for each paragraph on a given page in order to answer the provided question. Since each paragraph can have different size the functionality of dynamic shape is used. NOTE: With `min_request_token_num` parameter you can specify the minimum size of the request. If the paragraph has too short, it is concatenated with the next one until it has required length. When there is no paragraphs left to concatenate request is created with the remaining content. diff --git a/demos/common/cpp/Dockerfile b/demos/common/cpp/Dockerfile index 792d033401..2b0e475669 100644 --- a/demos/common/cpp/Dockerfile +++ b/demos/common/cpp/Dockerfile @@ -78,9 +78,9 @@ RUN mkdir /bazel && \ rm -f /bazel/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh WORKDIR /input -RUN wget https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/image_classification/input_images.txt && \ +RUN wget https://raw.githubusercontent.com/openvinotoolkit/model_server/releases/2024/2/demos/image_classification/input_images.txt && \ mkdir images && \ - for I in `cat input_images.txt | cut -d"/" -f6 | cut -d" " -f1` ; do curl https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/common/static/images/$I -o images/$I --create-dirs; done + for I in `cat input_images.txt | cut -d"/" -f6 | cut -d" " -f1` ; do curl https://raw.githubusercontent.com/openvinotoolkit/model_server/releases/2024/2/demos/common/static/images/$I -o images/$I --create-dirs; done WORKDIR /build COPY .bazelrc WORKSPACE /build/ diff --git a/demos/face_blur/python/README.md b/demos/face_blur/python/README.md index 10d73bae2e..24807b5b5b 100644 --- a/demos/face_blur/python/README.md +++ b/demos/face_blur/python/README.md @@ -1,6 +1,6 @@ # Face Blur Pipeline Demo with OVMS {#ovms_demo_face_blur_pipeline} -This document demonstrates how to create pipelines using object detection models from OpenVINO Model Zoo in order to blur the image. As an example, we will use [face-detection-retail-0004](https://github.com/openvinotoolkit/open_model_zoo/blob/2022.1.0/models/intel/face-detection-retail-0004/README.md) to detect multiple faces on the image. Then, for each detected face we will blur it using [face_blur](https://github.com/openvinotoolkit/model_server/blob/main/src/custom_nodes/face_blur) example custom node. +This document demonstrates how to create pipelines using object detection models from OpenVINO Model Zoo in order to blur the image. As an example, we will use [face-detection-retail-0004](https://github.com/openvinotoolkit/open_model_zoo/blob/2022.1.0/models/intel/face-detection-retail-0004/README.md) to detect multiple faces on the image. Then, for each detected face we will blur it using [face_blur](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/src/custom_nodes/face_blur) example custom node. ## Pipeline Configuration Graph @@ -10,7 +10,7 @@ Below is depicted graph implementing face blur pipeline execution. It include the following Nodes: - Model `face-detection-retail-0004` - deep learning model which takes user image as input. Its output contain information about faces coordinates and confidence levels. -- Custom node `face_blur` - it includes C++ implementation of image blurring. By analysing the output it produces image blurred in spots detected by object detection model based on the configurable score level threshold. Custom node also resizes it to the target resolution. All operations on the images employ OpenCV libraries which are preinstalled in the OVMS. Learn more about the [face_blur custom node](https://github.com/openvinotoolkit/model_server/blob/main/src/custom_nodes/face_blur). +- Custom node `face_blur` - it includes C++ implementation of image blurring. By analysing the output it produces image blurred in spots detected by object detection model based on the configurable score level threshold. Custom node also resizes it to the target resolution. All operations on the images employ OpenCV libraries which are preinstalled in the OVMS. Learn more about the [face_blur custom node](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/src/custom_nodes/face_blur). - Response - image blurred in spots detected by object detection model. ## Prepare workspace to run the demo diff --git a/demos/face_detection/python/README.md b/demos/face_detection/python/README.md index 39cd9c1df3..8388c8279c 100644 --- a/demos/face_detection/python/README.md +++ b/demos/face_detection/python/README.md @@ -2,7 +2,7 @@ ## Overview -The script [face_detection.py](https://github.com/openvinotoolkit/model_server/blob/main/demos/face_detection/python/face_detection.py) runs face detection inference requests for all the images +The script [face_detection.py](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/demos/face_detection/python/face_detection.py) runs face detection inference requests for all the images saved in `input_images_dir` directory. The script can adjust the input image size and change the batch size in the request. It demonstrates how to use diff --git a/demos/image_classification/cpp/README.md b/demos/image_classification/cpp/README.md index bf100c2e3f..942dab7e2e 100644 --- a/demos/image_classification/cpp/README.md +++ b/demos/image_classification/cpp/README.md @@ -12,7 +12,7 @@ cd model_server/demos/image_classification/cpp make ``` ->**Note**: In this directory you can only see the code specific to the benchmark client. The code shared with other C++ demos as well as all building utilities are placed in the [common C++ directory](https://github.com/openvinotoolkit/model_server/tree/main/demos/common/cpp). +>**Note**: In this directory you can only see the code specific to the benchmark client. The code shared with other C++ demos as well as all building utilities are placed in the [common C++ directory](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/demos/common/cpp). ## Prepare classification model diff --git a/demos/image_classification/python/README.md b/demos/image_classification/python/README.md index 58aed4ea2e..2e0c4111e3 100644 --- a/demos/image_classification/python/README.md +++ b/demos/image_classification/python/README.md @@ -2,7 +2,7 @@ ## Overview -The script [image_classification.py](https://github.com/openvinotoolkit/model_server/blob/main/demos/image_classification/python/image_classification.py) reads all images and their labels specified in the text file. It then classifies them with [ResNet50](https://docs.openvino.ai/2023.1/omz_models_model_resnet50_binary_0001.html) model and presents accuracy results. +The script [image_classification.py](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/demos/image_classification/python/image_classification.py) reads all images and their labels specified in the text file. It then classifies them with [ResNet50](https://docs.openvino.ai/2023.1/omz_models_model_resnet50_binary_0001.html) model and presents accuracy results. ## Download ResNet50 model diff --git a/demos/mediapipe/holistic_tracking/README.md b/demos/mediapipe/holistic_tracking/README.md index 2b67aa55a0..7de0b0eb25 100644 --- a/demos/mediapipe/holistic_tracking/README.md +++ b/demos/mediapipe/holistic_tracking/README.md @@ -79,5 +79,5 @@ Results saved to :image_0.jpg ## Real time stream analysis -For demo featuring real time stream application see [real_time_stream_analysis](https://github.com/openvinotoolkit/model_server/tree/main/demos/real_time_stream_analysis/python) +For demo featuring real time stream application see [real_time_stream_analysis](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/demos/real_time_stream_analysis/python) diff --git a/demos/mediapipe/iris_tracking/README.md b/demos/mediapipe/iris_tracking/README.md index 98f43096ce..f3c95c4186 100644 --- a/demos/mediapipe/iris_tracking/README.md +++ b/demos/mediapipe/iris_tracking/README.md @@ -32,7 +32,7 @@ docker run -d -v $PWD/mediapipe:/mediapipe -v $PWD/ovms:/models -p 9000:9000 ope ```bash pip install -r requirements.txt # download a sample image for analysis -wget https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/common/static/images/people/people2.jpeg +wget https://raw.githubusercontent.com/openvinotoolkit/model_server/releases/2024/2/demos/common/static/images/people/people2.jpeg echo "people2.jpeg" > input_images.txt # launch the client python mediapipe_iris_tracking.py --grpc_port 9000 --images_list input_images.txt diff --git a/demos/mediapipe/multi_model_graph/README.md b/demos/mediapipe/multi_model_graph/README.md index 835a1ded53..5cb7ad2e6f 100644 --- a/demos/mediapipe/multi_model_graph/README.md +++ b/demos/mediapipe/multi_model_graph/README.md @@ -20,7 +20,7 @@ cp -r ../../../src/test/dummy ./dummyAdd/ ``` ## Run OpenVINO Model Server -Prepare virtualenv according to [kserve samples readme](https://github.com/openvinotoolkit/model_server/blob/main/client/python/kserve-api/samples/README.md) +Prepare virtualenv according to [kserve samples readme](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/client/python/kserve-api/samples/README.md) ```bash docker run -d -v $PWD:/mediapipe -p 9000:9000 openvino/model_server:latest --config_path /mediapipe/config.json --port 9000 ``` diff --git a/demos/mediapipe/object_detection/README.md b/demos/mediapipe/object_detection/README.md index 9e75a2eb1d..c31a69d75a 100644 --- a/demos/mediapipe/object_detection/README.md +++ b/demos/mediapipe/object_detection/README.md @@ -45,4 +45,4 @@ Received images with bounding boxes will be located in ./results directory. ## Real time stream analysis -For demo featuring real time stream application see [real_time_stream_analysis](https://github.com/openvinotoolkit/model_server/tree/main/demos/real_time_stream_analysis/python) +For demo featuring real time stream application see [real_time_stream_analysis](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/demos/real_time_stream_analysis/python) diff --git a/demos/model_ensemble/python/README.md b/demos/model_ensemble/python/README.md index 8235d1a3ab..5456705ba7 100644 --- a/demos/model_ensemble/python/README.md +++ b/demos/model_ensemble/python/README.md @@ -24,7 +24,7 @@ make The steps in `Makefile` are: 1. Download and use the models from [open model zoo](https://github.com/openvinotoolkit/open_model_zoo). -2. Use [python script](https://github.com/openvinotoolkit/model_server/blob/main/tests/models/argmax_sum.py) located in this repository. Since it uses tensorflow to create models in _saved model_ format, hence tensorflow pip package is required. +2. Use [python script](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/tests/models/argmax_sum.py) located in this repository. Since it uses tensorflow to create models in _saved model_ format, hence tensorflow pip package is required. 3. Prepare argmax model with `(1, 1001)` input shapes to match output of the googlenet and resnet output shapes. The generated model will sum inputs and calculate the index with the highest value. The model output will indicate the most likely predicted class from the ImageNet* dataset. 4. Convert models to IR format and [prepare models repository](../../../docs/models_repository.md). @@ -54,7 +54,7 @@ models ## Step 2: Define required models and pipeline Pipelines need to be defined in the configuration file to use them. The same configuration file is used to define served models and served pipelines. -Use the [config.json located here](https://github.com/openvinotoolkit/model_server/blob/main/demos/model_ensemble/python/config.json), the content is as follows: +Use the [config.json located here](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/demos/model_ensemble/python/config.json), the content is as follows: ```bash cat config.json { diff --git a/demos/multi_faces_analysis_pipeline/python/README.md b/demos/multi_faces_analysis_pipeline/python/README.md index 3cbcdf6313..04db57d7a2 100644 --- a/demos/multi_faces_analysis_pipeline/python/README.md +++ b/demos/multi_faces_analysis_pipeline/python/README.md @@ -1,7 +1,7 @@ # Multi Faces Analysis Pipeline Demo {#ovms_demo_multi_faces_analysis_pipeline} -This document demonstrates how to create complex pipelines using object detection and object recognition models from OpenVINO Model Zoo. As an example, we will use [face-detection-retail-0004](https://github.com/openvinotoolkit/open_model_zoo/blob/2022.1.0/models/intel/face-detection-retail-0004/README.md) to detect multiple faces on the image. Then, for each detected face we will crop it using [model_zoo_intel_object_detection](https://github.com/openvinotoolkit/model_server/tree/main/src/custom_nodes/model_zoo_intel_object_detection) example custom node. Finally, each image face image will be forwarded to [age-gender-recognition-retail-0013](https://github.com/openvinotoolkit/open_model_zoo/blob/2022.1.0/models/intel/age-gender-recognition-retail-0013/README.md) and [emotion-recognition-retail-0003](https://github.com/openvinotoolkit/open_model_zoo/blob/2022.1.0/models/intel/emotions-recognition-retail-0003/README.md) models. +This document demonstrates how to create complex pipelines using object detection and object recognition models from OpenVINO Model Zoo. As an example, we will use [face-detection-retail-0004](https://github.com/openvinotoolkit/open_model_zoo/blob/2022.1.0/models/intel/face-detection-retail-0004/README.md) to detect multiple faces on the image. Then, for each detected face we will crop it using [model_zoo_intel_object_detection](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/src/custom_nodes/model_zoo_intel_object_detection) example custom node. Finally, each image face image will be forwarded to [age-gender-recognition-retail-0013](https://github.com/openvinotoolkit/open_model_zoo/blob/2022.1.0/models/intel/age-gender-recognition-retail-0013/README.md) and [emotion-recognition-retail-0003](https://github.com/openvinotoolkit/open_model_zoo/blob/2022.1.0/models/intel/emotions-recognition-retail-0003/README.md) models. ![Multi Faces Analysis Graph](multi_faces_analysis.png) @@ -20,7 +20,7 @@ Below is depicted graph implementing faces analysis pipeline execution. It includes the following Nodes: - Model `face-detection` - deep learning model which takes user image as input. Its outputs contain information about face coordinates and confidence levels. - Custom node `model_zoo_intel_object_detection` - it includes C++ implementation of common object detection models results processing. By analysing the output it produces cropped face images based on the configurable score level threshold. Custom node also resizes them to the target resolution and combines into a single output of a dynamic batch size. The output batch size is determined by the number of detected -boxes according to the configured criteria. All operations on the images employ OpenCV libraries which are preinstalled in the OVMS. Learn more about the [model_zoo_intel_object_detection custom node](https://github.com/openvinotoolkit/model_server/tree/main/src/custom_nodes/model_zoo_intel_object_detection). +boxes according to the configured criteria. All operations on the images employ OpenCV libraries which are preinstalled in the OVMS. Learn more about the [model_zoo_intel_object_detection custom node](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/src/custom_nodes/model_zoo_intel_object_detection). - demultiplexer - outputs from the custom node model_zoo_intel_object_detection have variable batch size. In order to match it with the sequential recognition models, data is split into individual images with each batch size equal to 1. Such smaller requests can be submitted for inference in parallel to the next Model Nodes. Learn more about the [demultiplexing](../../../docs/demultiplexing.md). - Model `age-gender-recognition` - this model recognizes age and gender on given face image @@ -111,7 +111,7 @@ docker run -p 9000:9000 -d -v ${PWD}/workspace:/workspace openvino/model_server ## Requesting the Service -Exemplary client [multi_faces_analysis_pipeline.py](https://github.com/openvinotoolkit/model_server/blob/main/demos/multi_faces_analysis_pipeline/python/multi_faces_analysis_pipeline.py) can be used to request pipeline deployed in previous step. +Exemplary client [multi_faces_analysis_pipeline.py](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/demos/multi_faces_analysis_pipeline/python/multi_faces_analysis_pipeline.py) can be used to request pipeline deployed in previous step. ```bash pip3 install -r requirements.txt diff --git a/demos/optical_character_recognition/python/README.md b/demos/optical_character_recognition/python/README.md index f3d7795b67..caf961b2a7 100644 --- a/demos/optical_character_recognition/python/README.md +++ b/demos/optical_character_recognition/python/README.md @@ -18,7 +18,7 @@ It includes the following nodes: - Custom node east_ocr - it includes C++ implementation of east-resnet50 model results processing. It analyses the detected boxes coordinates, filters the results based on the configurable score level threshold and and applies non-max suppression algorithm to remove overlapping boxes. Finally the custom node east-ocr crops all detected boxes from the original image, resize them to the target resolution and combines into a single output of a dynamic batch size. The output batch size is determined by the number of detected -boxes according to the configured criteria. All operations on the images employ OpenCV libraries which are preinstalled in the OVMS. Learn more about the [east_ocr custom node](https://github.com/openvinotoolkit/model_server/tree/main/src/custom_nodes/east_ocr) +boxes according to the configured criteria. All operations on the images employ OpenCV libraries which are preinstalled in the OVMS. Learn more about the [east_ocr custom node](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/src/custom_nodes/east_ocr) - demultiplexer - output from the Custom node east_ocr have variable batch size. In order to match it with the sequential text detection model, the data is split into individual images with batch size 1 each. Such smaller requests can be submitted for inference in parallel to the next Model Node. Learn more about the [demultiplexing](../../../docs/demultiplexing.md) - Model text-recognition - this model recognizes characters included in the input image. @@ -103,11 +103,11 @@ text-recognition model will have the following interface: ## Building the Custom Node "east_ocr" Library -Custom nodes are loaded into OVMS as dynamic library implementing OVMS API from [custom_node_interface.h](https://github.com/openvinotoolkit/model_server/blob/main/src/custom_node_interface.h). +Custom nodes are loaded into OVMS as dynamic library implementing OVMS API from [custom_node_interface.h](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/src/custom_node_interface.h). It can use OpenCV libraries included in OVMS or it could use other third party components. The custom node east_ocr can be built inside a docker container via the following procedure: -- go to the directory with custom node examples [src/custom_node](https://github.com/openvinotoolkit/model_server/blob/main/src/custom_nodes) +- go to the directory with custom node examples [src/custom_node](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/src/custom_nodes) - run `make` command: ```bash @@ -131,7 +131,7 @@ cp -R EAST/IR/1 OCR/east_fp32/1 ## OVMS Configuration File -The configuration file for running the OCR demo is stored in [config.json](https://github.com/openvinotoolkit/model_server/blob/main/demos/optical_character_recognition/python/config.json) +The configuration file for running the OCR demo is stored in [config.json](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/demos/optical_character_recognition/python/config.json) Copy this file along with the model files and the custom node library like presented below: ```bash cp model_server/demos/optical_character_recognition/python/config.json OCR diff --git a/demos/python_demos/clip_image_classification/README.md b/demos/python_demos/clip_image_classification/README.md index 9ca8992dc1..e583248154 100644 --- a/demos/python_demos/clip_image_classification/README.md +++ b/demos/python_demos/clip_image_classification/README.md @@ -1,6 +1,6 @@ # CLIP image classification {#ovms_demo_clip_image_classification} -Image classification demo using multi-modal CLIP model for inference and [Python code](https://docs.openvino.ai/nightly/ovms_docs_python_support_reference.html) for pre and postprocessing. +Image classification demo using multi-modal CLIP model for inference and [Python code](https://docs.openvino.ai/2024/ovms_docs_python_support_reference.html) for pre and postprocessing. The client sends request with an image and input labels to the graph and receives the label with the highest probability. The preprocessing python node is executed first and prepares inputs vector based on user inputs from the request. Then inputs are used to get similarity matrix from inference on the CLIP model. After that postprocessing python node is executed and extracts the label with highest score among the input labels and sends it back to the user. Demo is based on this [CLIP notebook](https://github.com/openvinotoolkit/openvino_notebooks/blob/main/notebooks/228-clip-zero-shot-image-classification/228-clip-zero-shot-classification.ipynb) diff --git a/demos/python_demos/rag_chatbot/README.md b/demos/python_demos/rag_chatbot/README.md index c72234b8f0..a9e2ed3680 100644 --- a/demos/python_demos/rag_chatbot/README.md +++ b/demos/python_demos/rag_chatbot/README.md @@ -39,7 +39,7 @@ It will create an image called `openvino/model_server:py` ## OpenVINO Model Server deployment with online models pulling from Hugging Face Hub In this demo, OpenVINO Model Server has an option to pull the required models from Hugging Face Hub. -It's a simple deployment option because it doesn't require models preparation which have to be attached to the container. Just the demo scripts and configation files are required in the container at startup. +It's a simple deployment option because it doesn't require models preparation which have to be attached to the container. Just the demo scripts and configuration files are required in the container at startup. What needs to be prepared is a list of documents, which should give context of RAG analysis. It is provided in a format of text file containing URLs of the documents sources: diff --git a/demos/real_time_stream_analysis/python/README.md b/demos/real_time_stream_analysis/python/README.md index ebba4a892a..23f8ab4c08 100644 --- a/demos/real_time_stream_analysis/python/README.md +++ b/demos/real_time_stream_analysis/python/README.md @@ -30,7 +30,7 @@ In the demo will be used two gRPC communication patterns which might be advantag ## gRPC streaming with MediaPipe graphs gRPC stream connection is allowed for served [MediaPipe graphs](../../../docs/mediapipe.md). It allows sending asynchronous calls to the endpoint all linked in a single session context. Responses are sent back via a stream and processed in the callback function. -The helper class [StreamClient](https://github.com/openvinotoolkit/model_server/blob/main/demos/common/stream_client/stream_client.py) provides a mechanism for flow control and tracking the sequence of the requests and responses. In the StreamClient initialization the streaming mode is set via the parameter `streaming_api=True`. +The helper class [StreamClient](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/demos/common/stream_client/stream_client.py) provides a mechanism for flow control and tracking the sequence of the requests and responses. In the StreamClient initialization the streaming mode is set via the parameter `streaming_api=True`. Using the streaming API has the following advantages: - good performance thanks to asynchronous calls and sharing the graph execution for multiple calls @@ -39,7 +39,7 @@ Using the streaming API has the following advantages: ### Preparing the model server for gRPC streaming with a Holistic graph -The [holistic graph](https://github.com/openvinotoolkit/model_server/blob/main/demos/mediapipe/holistic_tracking/holistic_tracking.pbtxt) is expecting and IMAGE object on the input and returns an IMAGE on the output. +The [holistic graph](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/demos/mediapipe/holistic_tracking/holistic_tracking.pbtxt) is expecting and IMAGE object on the input and returns an IMAGE on the output. As such it doesn't require any preprocessing and postprocessing. In this demo the returned stream will be just visualized or sent to the target sink. The model server with the holistic use case can be deployed with the following steps: diff --git a/demos/single_face_analysis_pipeline/python/README.md b/demos/single_face_analysis_pipeline/python/README.md index c6f81e8541..bbe6a915b5 100644 --- a/demos/single_face_analysis_pipeline/python/README.md +++ b/demos/single_face_analysis_pipeline/python/README.md @@ -52,7 +52,7 @@ docker run -p 9000:9000 -d -v ${PWD}/workspace:/workspace openvino/model_server ## Requesting the Service -Exemplary client [single_face_analysis_pipeline.py](https://github.com/openvinotoolkit/model_server/blob/main/demos/single_face_analysis_pipeline/python/single_face_analysis_pipeline.py) can be used to request pipeline deployed in previous step. +Exemplary client [single_face_analysis_pipeline.py](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/demos/single_face_analysis_pipeline/python/single_face_analysis_pipeline.py) can be used to request pipeline deployed in previous step. ```bash pip3 install -r requirements.txt diff --git a/demos/using_onnx_model/python/README.md b/demos/using_onnx_model/python/README.md index 19a167b255..ebd5214056 100644 --- a/demos/using_onnx_model/python/README.md +++ b/demos/using_onnx_model/python/README.md @@ -77,7 +77,7 @@ docker run -d -u $(id -u):$(id -g) -v $(pwd)/workspace:/workspace -p 9001:9001 o --config_path /workspace/config.json --port 9001 ``` -The `onnx_model_demo.py` script can run inference both with and without performing preprocessing. Since in this variant preprocessing is done by the model server (via custom node), there's no need to perform any image preprocessing on the client side. In that case, run without `--run_preprocessing` option. See [preprocessing function](https://github.com/openvinotoolkit/model_server/blob/main/demos/using_onnx_model/python/onnx_model_demo.py#L26-L33) run in the client. +The `onnx_model_demo.py` script can run inference both with and without performing preprocessing. Since in this variant preprocessing is done by the model server (via custom node), there's no need to perform any image preprocessing on the client side. In that case, run without `--run_preprocessing` option. See [preprocessing function](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/demos/using_onnx_model/python/onnx_model_demo.py#L26-L33) run in the client. Run the client without preprocessing: ```bash @@ -88,7 +88,7 @@ Detected class name: bee ``` ## Node parameters explanation -Additional preprocessing step applies a division and an subtraction to each pixel value in the image. This calculation is configured by passing two parameters to _image transformation_ custom node in [config.json](https://github.com/openvinotoolkit/model_server/blob/main/demos/using_onnx_model/python/config.json#L32-L33): +Additional preprocessing step applies a division and an subtraction to each pixel value in the image. This calculation is configured by passing two parameters to _image transformation_ custom node in [config.json](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/demos/using_onnx_model/python/config.json#L32-L33): ``` "params": { ... diff --git a/demos/vehicle_analysis_pipeline/python/README.md b/demos/vehicle_analysis_pipeline/python/README.md index ab1b21464c..d4a4c1d5d3 100644 --- a/demos/vehicle_analysis_pipeline/python/README.md +++ b/demos/vehicle_analysis_pipeline/python/README.md @@ -1,5 +1,5 @@ # Vehicle Analysis Pipeline Demo {#ovms_demo_vehicle_analysis_pipeline} -This document demonstrates how to create complex pipelines using object detection and object recognition models from OpenVINO Model Zoo. As an example, we will use [vehicle-detection-0202](https://github.com/openvinotoolkit/open_model_zoo/blob/2022.1.0/models/intel/vehicle-detection-0202/README.md) to detect multiple vehicles on the image. Then, for each detected vehicle we will crop it using [model_zoo_intel_object_detection](https://github.com/openvinotoolkit/model_server/tree/main/src/custom_nodes/model_zoo_intel_object_detection) example custom node. Finally, each vehicle image will be forwarded to [vehicle-attributes-recognition-barrier-0042](https://github.com/openvinotoolkit/open_model_zoo/blob/2022.1.0/models/intel/vehicle-attributes-recognition-barrier-0042/README.md) model. +This document demonstrates how to create complex pipelines using object detection and object recognition models from OpenVINO Model Zoo. As an example, we will use [vehicle-detection-0202](https://github.com/openvinotoolkit/open_model_zoo/blob/2022.1.0/models/intel/vehicle-detection-0202/README.md) to detect multiple vehicles on the image. Then, for each detected vehicle we will crop it using [model_zoo_intel_object_detection](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/src/custom_nodes/model_zoo_intel_object_detection) example custom node. Finally, each vehicle image will be forwarded to [vehicle-attributes-recognition-barrier-0042](https://github.com/openvinotoolkit/open_model_zoo/blob/2022.1.0/models/intel/vehicle-attributes-recognition-barrier-0042/README.md) model. ![Vehicles analysis visualization](vehicles_analysis.png) @@ -14,7 +14,7 @@ Below is depicted graph implementing vehicles analysis pipeline execution. It includes the following Nodes: - Model `vehicle_detection` - deep learning model which takes user image as input. Its outputs contain information about vehicle coordinates and confidence levels. - Custom node `model_zoo_intel_object_detection` - it includes C++ implementation of common object detection models results processing. By analysing the output it produces cropped vehicle images based on the configurable score level threshold. Custom node also resizes them to the target resolution and combines into a single output of a dynamic batch size. The output batch size is determined by the number of detected -boxes according to the configured criteria. All operations on the images employ OpenCV libraries which are preinstalled in the OVMS. Learn more about the [model_zoo_intel_object_detection custom node](https://github.com/openvinotoolkit/model_server/tree/main/src/custom_nodes/model_zoo_intel_object_detection). +boxes according to the configured criteria. All operations on the images employ OpenCV libraries which are preinstalled in the OVMS. Learn more about the [model_zoo_intel_object_detection custom node](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/src/custom_nodes/model_zoo_intel_object_detection). - demultiplexer - outputs from the custom node model_zoo_intel_object_detection have variable batch size. In order to match it with the sequential recognition models, data is split into individual images with each batch size equal to 1. Such smaller requests can be submitted for inference in parallel to the next Model Nodes. Learn more about the [demultiplexing](../../../docs/demultiplexing.md). - Model `vehicle_attributes_recognition` - this model recognizes type and color for given vehicle image diff --git a/docs/accelerators.md b/docs/accelerators.md index f1f39ec5e8..c5e3a18c43 100644 --- a/docs/accelerators.md +++ b/docs/accelerators.md @@ -50,7 +50,7 @@ docker run --rm -it --device=/dev/dxg --volume /usr/lib/wsl:/usr/lib/wsl -u $(i > **NOTE**: > The public docker image includes the OpenCL drivers for GPU in version 22.28 (RedHat) and 22.35 (Ubuntu). -If you need to build the OpenVINO Model Server with different driver version, refer to the [building from sources](https://github.com/openvinotoolkit/model_server/blob/main/docs/build_from_source.md) +If you need to build the OpenVINO Model Server with different driver version, refer to the [building from sources](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/docs/build_from_source.md) ## Using Multi-Device Plugin @@ -173,7 +173,7 @@ cd model_server make docker_build NVIDIA=1 OV_USE_BINARY=0 cd .. ``` -Check also [building from sources](https://github.com/openvinotoolkit/model_server/blob/main/docs/build_from_source.md). +Check also [building from sources](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/docs/build_from_source.md). Example command to run container with NVIDIA support: diff --git a/docs/binary_input_kfs.md b/docs/binary_input_kfs.md index 94339f39bc..ad9cae8da3 100644 --- a/docs/binary_input_kfs.md +++ b/docs/binary_input_kfs.md @@ -23,7 +23,7 @@ KServe API also allows sending encoded images via HTTP interface to the model or For binary inputs, the `parameters` map in the JSON part contains `binary_data_size` field for each binary input that indicates the size of the data on the input. Since there's no strict limitations on image resolution and format (as long as it can be loaded by OpenCV), images might be of different sizes. To send a batch of images you need to precede data of every batch by 4 bytes(little endian) containing size of this batch and specify their combined size in `binary_data_size`. For example, if batch would contain three images of sizes 370, 480, 500 bytes the content of input buffer inside binary extension would look like this: <0x72010000 (=370)><370 bytes of first image><0xE0010000 (=480)><480 bytes of second image> <0xF4010000 (=500)><500 bytes of third image> And in that case binary_data_size would be 1350(370 + 480 + 500) -Function set_data_from_numpy in triton client lib that we use in our [REST sample](https://github.com/openvinotoolkit/model_server/blob/main/client/python/kserve-api/samples/http_infer_binary_resnet.py) automatically converts given images to this format. +Function set_data_from_numpy in triton client lib that we use in our [REST sample](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/client/python/kserve-api/samples/http_infer_binary_resnet.py) automatically converts given images to this format. If the request contains only one input `binary_data_size` parameter can be omitted - in this case whole buffer is treated as a input image. @@ -48,8 +48,8 @@ For the Raw Data binary inputs `binary_data_size` parameter can be omitted since ## Usage examples -Sample clients that use binary inputs via KFS API can be found here ([REST sample](https://github.com/openvinotoolkit/model_server/blob/main/client/python/kserve-api/samples/http_infer_binary_resnet.py))/([GRPC sample](https://github.com/openvinotoolkit/model_server/blob/main/client/python/kserve-api/samples/grpc_infer_binary_resnet.py)) -Also, see the ([README](https://github.com/openvinotoolkit/model_server/blob/main/client/python/kserve-api/samples/README.md)) +Sample clients that use binary inputs via KFS API can be found here ([REST sample](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/client/python/kserve-api/samples/http_infer_binary_resnet.py))/([GRPC sample](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/client/python/kserve-api/samples/grpc_infer_binary_resnet.py)) +Also, see the ([README](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/client/python/kserve-api/samples/README.md)) ## Recommendations: diff --git a/docs/binary_input_tfs.md b/docs/binary_input_tfs.md index 285f0a376e..dd31a89410 100644 --- a/docs/binary_input_tfs.md +++ b/docs/binary_input_tfs.md @@ -28,8 +28,8 @@ On the server side, the Base64 encoded data is decoded to raw binary and loaded ## Usage examples -Sample clients that use binary inputs via TFS API can be found here ([REST sample](https://github.com/openvinotoolkit/model_server/blob/main/client/python/ovmsclient/samples/http_predict_binary_resnet.py))/([GRPC sample](https://github.com/openvinotoolkit/model_server/blob/main/client/python/ovmsclient/samples/grpc_predict_binary_resnet.py)) -Also, see the ([README](https://github.com/openvinotoolkit/model_server/blob/main/client/python/ovmsclient/samples/README.md)) +Sample clients that use binary inputs via TFS API can be found here ([REST sample](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/client/python/ovmsclient/samples/http_predict_binary_resnet.py))/([GRPC sample](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/client/python/ovmsclient/samples/grpc_predict_binary_resnet.py)) +Also, see the ([README](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/client/python/ovmsclient/samples/README.md)) ## Recommendations: diff --git a/docs/build_from_source.md b/docs/build_from_source.md index fc5cadc745..4b89bc78e9 100644 --- a/docs/build_from_source.md +++ b/docs/build_from_source.md @@ -196,4 +196,4 @@ dist/ubuntu ``` -Read more detailed usage in [developer guide](https://github.com/openvinotoolkit/model_server/blob/main/docs/developer_guide.md). \ No newline at end of file +Read more detailed usage in [developer guide](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/docs/developer_guide.md). \ No newline at end of file diff --git a/docs/clients_kfs.md b/docs/clients_kfs.md index bd4fc6e5cc..2ca2a10a3c 100644 --- a/docs/clients_kfs.md +++ b/docs/clients_kfs.md @@ -8,7 +8,7 @@ hidden: gRPC API RESTful API -Examples +Examples ``` ## Python Client @@ -821,4 +821,4 @@ client.stop_stream() ::: :::: -For complete usage examples see [Kserve samples](https://github.com/openvinotoolkit/model_server/tree/main/client/python/kserve-api/samples). +For complete usage examples see [Kserve samples](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/client/python/kserve-api/samples). diff --git a/docs/clients_tfs.md b/docs/clients_tfs.md index 73d42c0e96..52b8ac8541 100644 --- a/docs/clients_tfs.md +++ b/docs/clients_tfs.md @@ -8,7 +8,7 @@ hidden: gRPC API RESTful API -Examples +Examples ``` ## Python Client @@ -329,7 +329,7 @@ curl -X POST http://localhost:8000/v1/models/my_model:predict :::: -For complete usage examples see [ovmsclient samples](https://github.com/openvinotoolkit/model_server/tree/main/client/python/ovmsclient/samples). +For complete usage examples see [ovmsclient samples](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/client/python/ovmsclient/samples). ## C++ and Go Clients diff --git a/docs/custom_model_loader.md b/docs/custom_model_loader.md index b6213a5746..a8e08b5c36 100644 --- a/docs/custom_model_loader.md +++ b/docs/custom_model_loader.md @@ -35,7 +35,7 @@ To enable a particular model to load using custom loader, add extra parameter in ### C++ API Interface for custom loader: -A base class **CustomLoaderInterface** along with interface API is defined in [src/customloaderinterface.hpp](https://github.com/openvinotoolkit/model_server/blob/main/src/customloaderinterface.hpp) +A base class **CustomLoaderInterface** along with interface API is defined in [src/customloaderinterface.hpp](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/src/customloaderinterface.hpp) Refer to this file for API details. @@ -45,7 +45,7 @@ Derive the new custom loader class from base class **CustomLoaderInterface** and **CustomLoaderInterface* createCustomLoader** which allocates the new custom loader and returns a pointer to the base class. -An example custom loader which reads files and returns required buffers to be loaded is implemented and provided as reference in **[src/example/SampleCustomLoader](https://github.com/openvinotoolkit/model_server/blob/main/src/example/SampleCustomLoader)** +An example custom loader which reads files and returns required buffers to be loaded is implemented and provided as reference in **[src/example/SampleCustomLoader](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/src/example/SampleCustomLoader)** This custom loader is built with the model server build and available in the docker *openvino/model_server-build:latest*. The shared library can be either copied from this docker or built using makefile. An example Makefile is provided as a reference in the directory. @@ -89,7 +89,7 @@ chmod -R 755 ./model Step 4: Download the required Client Components ```bash -curl --fail https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/common/python/client_utils.py -o client_utils.py https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/face_detection/python/face_detection.py -o face_detection.py https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/common/python/requirements.txt -o requirements.txt +curl --fail https://raw.githubusercontent.com/openvinotoolkit/model_server/releases/2024/2/demos/common/python/client_utils.py -o client_utils.py https://raw.githubusercontent.com/openvinotoolkit/model_server/releases/2024/2/demos/face_detection/python/face_detection.py -o face_detection.py https://raw.githubusercontent.com/openvinotoolkit/model_server/releases/2024/2/demos/common/python/requirements.txt -o requirements.txt pip3 install --upgrade pip pip3 install -r requirements.txt @@ -99,7 +99,7 @@ pip3 install -r requirements.txt Step 5: Download Data for Inference ```bash -curl --fail --create-dirs https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/common/static/images/people/people1.jpeg -o images/people1.jpeg +curl --fail --create-dirs https://raw.githubusercontent.com/openvinotoolkit/model_server/releases/2024/2/demos/common/static/images/people/people1.jpeg -o images/people1.jpeg ``` Step 6: Prepare the config json. diff --git a/docs/custom_node_development.md b/docs/custom_node_development.md index 74144484e2..d70b04185e 100644 --- a/docs/custom_node_development.md +++ b/docs/custom_node_development.md @@ -11,7 +11,7 @@ developed in C++ or C to perform arbitrary data transformations. ## Custom Node API -The custom node library must implement the API interface defined in [custom_node_interface.h](https://github.com/openvinotoolkit/model_server/tree/main/src/custom_node_interface.h). +The custom node library must implement the API interface defined in [custom_node_interface.h](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/src/custom_node_interface.h). The interface is defined in `C` to simplify compatibility with various compilers. The library could use third party components linked statically or dynamically. OpenCV is a built in component in OVMS which could be used to perform manipulation on the image data. @@ -67,7 +67,7 @@ Note that during the function execution all the output data buffers need to be a the request processing is completed and returned to the user. The cleanup is triggered by calling the `release` function which also needs to be implemented in the custom library. -In some cases, dynamic allocation in `execute` call might be a performance bottleneck or cause memory fragmentation. Starting from 2022.1 release, it is possible to preallocate memory during DAG initialization and reuse it in subsequent inference requests. Refer to `initialize` and `deinitialize` functions below. Those can be used to implement preallocated memory pool. Example implementation can be seen in [custom node example source](https://github.com/openvinotoolkit/model_server/blob/main/src/custom_nodes/add_one/add_one.cpp#L141). +In some cases, dynamic allocation in `execute` call might be a performance bottleneck or cause memory fragmentation. Starting from 2022.1 release, it is possible to preallocate memory during DAG initialization and reuse it in subsequent inference requests. Refer to `initialize` and `deinitialize` functions below. Those can be used to implement preallocated memory pool. Example implementation can be seen in [custom node example source](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/src/custom_nodes/add_one/add_one.cpp#L141). Execute function returns an integer value that defines the success (`0` value) or failure (other than 0). When the function reports error, the pipeline execution is stopped and the error is returned to the user. @@ -147,7 +147,7 @@ would be converted to ["String_123", "", "zebra"]. ## Building Custom node library can be compiled using any tool. It is recommended to follow the example based -a docker container with all build dependencies included. It is described in this [Makefile](https://github.com/openvinotoolkit/model_server/blob/main/src/custom_nodes/Makefile). +a docker container with all build dependencies included. It is described in this [Makefile](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/src/custom_nodes/Makefile). ## Testing The recommended method for testing the custom library is via OVMS execution: @@ -157,7 +157,7 @@ The recommended method for testing the custom library is via OVMS execution: - Submit a request to OVMS endpoint using a gRPC or REST client. - Analyse the logs on the OVMS server. -For debugging steps, refer to the OVMS [developer guide](https://github.com/openvinotoolkit/model_server/blob/main/docs/developer_guide.md) +For debugging steps, refer to the OVMS [developer guide](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/docs/developer_guide.md) ## Built-in custom nodes @@ -167,16 +167,16 @@ Below you can see the list of fully functional custom nodes embedded in the mode | Custom Node | Location in the container | | :--- | :---- | -| [east-resnet50 OCR custom node](https://github.com/openvinotoolkit/model_server/tree/main/src/custom_nodes/east_ocr) | `/ovms/lib/custom_nodes/libcustom_node_east_ocr.so`| -| [horizontal OCR custom node](https://github.com/openvinotoolkit/model_server/tree/main/src/custom_nodes/horizontal_ocr) | `/ovms/lib/custom_nodes/libcustom_node_horizontal_ocr.so`| -| [model zoo intel object detection custom node](https://github.com/openvinotoolkit/model_server/tree/main/src/custom_nodes/model_zoo_intel_object_detection) | `/ovms/lib/custom_nodes/libcustom_node_model_zoo_intel_object_detection.so`| -| [image transformation custom node](https://github.com/openvinotoolkit/model_server/tree/main/src/custom_nodes/image_transformation) | `/ovms/lib/custom_nodes/libcustom_node_image_transformation.so`| -| [add one custom node](https://github.com/openvinotoolkit/model_server/tree/main/src/custom_nodes/add_one) | `/ovms/lib/custom_nodes/libcustom_node_add_one.so`| -| [face blur custom node](https://github.com/openvinotoolkit/model_server/tree/main/src/custom_nodes/face_blur) | `/ovms/lib/custom_nodes/libcustom_node_face_blur.so`| +| [east-resnet50 OCR custom node](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/src/custom_nodes/east_ocr) | `/ovms/lib/custom_nodes/libcustom_node_east_ocr.so`| +| [horizontal OCR custom node](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/src/custom_nodes/horizontal_ocr) | `/ovms/lib/custom_nodes/libcustom_node_horizontal_ocr.so`| +| [model zoo intel object detection custom node](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/src/custom_nodes/model_zoo_intel_object_detection) | `/ovms/lib/custom_nodes/libcustom_node_model_zoo_intel_object_detection.so`| +| [image transformation custom node](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/src/custom_nodes/image_transformation) | `/ovms/lib/custom_nodes/libcustom_node_image_transformation.so`| +| [add one custom node](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/src/custom_nodes/add_one) | `/ovms/lib/custom_nodes/libcustom_node_add_one.so`| +| [face blur custom node](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/src/custom_nodes/face_blur) | `/ovms/lib/custom_nodes/libcustom_node_face_blur.so`| **Example:** -Including built-in [horizontal OCR custom node](https://github.com/openvinotoolkit/model_server/tree/main/src/custom_nodes/horizontal_ocr) in the `config.json` would look like: +Including built-in [horizontal OCR custom node](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/src/custom_nodes/horizontal_ocr) in the `config.json` would look like: ```json ... "custom_node_library_config_list": [ @@ -191,8 +191,8 @@ Including built-in [horizontal OCR custom node](https://github.com/openvinotoolk The custom node is already available under this path. No need to build anything and mounting to the container. Additional examples are included in the unit tests: -- [node_add_sub.c](https://github.com/openvinotoolkit/model_server/tree/main/src/test/custom_nodes/node_add_sub.c) -- [node_choose_maximum.cpp](https://github.com/openvinotoolkit/model_server/tree/main/src/test/custom_nodes/node_choose_maximum.cpp) -- [node_missing_implementation.c](https://github.com/openvinotoolkit/model_server/tree/main/src/test/custom_nodes/node_missing_implementation.c) -- [node_perform_different_operations.cpp](https://github.com/openvinotoolkit/model_server/tree/main/src/test/custom_nodes/node_perform_different_operations.cpp) +- [node_add_sub.c](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/src/test/custom_nodes/node_add_sub.c) +- [node_choose_maximum.cpp](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/src/test/custom_nodes/node_choose_maximum.cpp) +- [node_missing_implementation.c](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/src/test/custom_nodes/node_missing_implementation.c) +- [node_perform_different_operations.cpp](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/src/test/custom_nodes/node_perform_different_operations.cpp) diff --git a/docs/dag_scheduler.md b/docs/dag_scheduler.md index 14a888268a..c6c992dd79 100644 --- a/docs/dag_scheduler.md +++ b/docs/dag_scheduler.md @@ -44,7 +44,7 @@ There are two special kinds of nodes - Request and Response node. Both of them a ### Custom node type * custom - that node can be used to implement all operations on the data which can not be handled by the neural network model. It is represented by -a C++ dynamic library implementing OVMS API defined in [custom_node_interface.h](https://github.com/openvinotoolkit/model_server/blob/main/src/custom_node_interface.h). Custom nodes can run the data +a C++ dynamic library implementing OVMS API defined in [custom_node_interface.h](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/src/custom_node_interface.h). Custom nodes can run the data processing using OpenCV, which is included in OVMS, or include other third-party components. Custom node libraries are loaded into OVMS by adding their definition to the pipeline configuration. The configuration includes a path to the compiled binary with the `.so` extension. Custom nodes are not versioned, meaning one custom node library is bound to one name. To load another version, another name needs to be used. diff --git a/docs/deploying_server.md b/docs/deploying_server.md index 3d0933c587..63554197c8 100644 --- a/docs/deploying_server.md +++ b/docs/deploying_server.md @@ -47,8 +47,8 @@ docker run -u $(id -u) -v $(pwd)/models:/models -p 9000:9000 openvino/model_serv ##### 2.2 Download input files: an image and a label mapping file ```bash -wget https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/common/static/images/zebra.jpeg -wget https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/common/python/classes.py +wget https://raw.githubusercontent.com/openvinotoolkit/model_server/releases/2024/2/demos/common/static/images/zebra.jpeg +wget https://raw.githubusercontent.com/openvinotoolkit/model_server/releases/2024/2/demos/common/python/classes.py ``` ##### 2.3 Install the Python-based ovmsclient package diff --git a/docs/dynamic_bs_auto_reload.md b/docs/dynamic_bs_auto_reload.md index 45d8c6298c..3556d9e94a 100644 --- a/docs/dynamic_bs_auto_reload.md +++ b/docs/dynamic_bs_auto_reload.md @@ -7,7 +7,7 @@ This guide shows how to configure a model to accept input data with different ba Enabling dynamic batch size via model reload is as simple as setting the `batch_size` parameter to `auto`. To configure and use the dynamic batch size, take advantage of: -- An example client in Python [grpc_predict_resnet.py](https://github.com/openvinotoolkit/model_server/blob/main/client/python/tensorflow-serving-api/samples/grpc_predict_resnet.py) that can be used to request inference with the desired batch size. +- An example client in Python [grpc_predict_resnet.py](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/client/python/tensorflow-serving-api/samples/grpc_predict_resnet.py) that can be used to request inference with the desired batch size. - A sample [resnet](https://github.com/openvinotoolkit/open_model_zoo/blob/2022.1.0/models/intel/resnet50-binary-0001/README.md) model. diff --git a/docs/dynamic_bs_demultiplexer.md b/docs/dynamic_bs_demultiplexer.md index d00a9754f5..435c808fa1 100644 --- a/docs/dynamic_bs_demultiplexer.md +++ b/docs/dynamic_bs_demultiplexer.md @@ -9,7 +9,7 @@ More information about this feature can be found in [dynamic batch size in demul > **NOTE**: Only one dynamic demultiplexer (`demultiply_count` with value `-1`) can exist in the pipeline. -- Example client in python [grpc_predict_resnet.py](https://github.com/openvinotoolkit/model_server/blob/main/client/python/tensorflow-serving-api/samples/grpc_predict_resnet.py) can be used to request the pipeline. Use `--dag-batch-size-auto` flag to add an additional dimension to the input shape which is required for demultiplexing feature. +- Example client in python [grpc_predict_resnet.py](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/client/python/tensorflow-serving-api/samples/grpc_predict_resnet.py) can be used to request the pipeline. Use `--dag-batch-size-auto` flag to add an additional dimension to the input shape which is required for demultiplexing feature. - The example uses model [resnet](https://github.com/openvinotoolkit/open_model_zoo/blob/2022.1.0/models/intel/resnet50-binary-0001/README.md). diff --git a/docs/dynamic_input.md b/docs/dynamic_input.md index f717683dea..8e9c1225ed 100644 --- a/docs/dynamic_input.md +++ b/docs/dynamic_input.md @@ -43,5 +43,5 @@ OpenVINO Model Server accepts several data types that can be handled on [MediaPi - Next node in the graph uses a calculator that can decode raw KServe request. In such case dynamic input handling must be implemented as part of the calculator logic since model server passes the request to the calculator as-is. Such node expects input stream with a tag starting with `REQUEST` prefix. -- Next node in the graph uses `PythonExecutorCalculator`. In such case data in the KServe request will be available to the user as input argument of their Python [execute function](https://github.com/openvinotoolkit/model_server/blob/main/docs/python_support/reference.md#ovmspythonmodel-class). Such node expects input stream with a tag starting with `OVMS_PY_TENSOR` prefix. +- Next node in the graph uses `PythonExecutorCalculator`. In such case data in the KServe request will be available to the user as input argument of their Python [execute function](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/docs/python_support/reference.md#ovmspythonmodel-class). Such node expects input stream with a tag starting with `OVMS_PY_TENSOR` prefix. diff --git a/docs/dynamic_shape_auto_reload.md b/docs/dynamic_shape_auto_reload.md index 552e000ec6..bab5faaa07 100644 --- a/docs/dynamic_shape_auto_reload.md +++ b/docs/dynamic_shape_auto_reload.md @@ -7,7 +7,7 @@ This guide explains how to configure a model to accept input data in different s Enable dynamic shape via model reloading by setting the `shape` parameter to `auto`. To configure and use the dynamic batch size, take advantage of: -- Example client in Python [face_detection.py](https://github.com/openvinotoolkit/model_server/blob/main/demos/face_detection/python/face_detection.py) that can be used to request inference with the desired input shape. +- Example client in Python [face_detection.py](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/demos/face_detection/python/face_detection.py) that can be used to request inference with the desired input shape. - An example [face_detection_retail_0004](https://docs.openvinotoolkit.org/2021.4/omz_models_model_face_detection_retail_0004.html) model. diff --git a/docs/dynamic_shape_binary_inputs.md b/docs/dynamic_shape_binary_inputs.md index d8618a98f8..02c1ecde5a 100644 --- a/docs/dynamic_shape_binary_inputs.md +++ b/docs/dynamic_shape_binary_inputs.md @@ -36,9 +36,9 @@ pip3 install ovmsclient ### Download a Sample Image and Label Mappings ```bash -wget https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/common/static/images/zebra.jpeg +wget https://raw.githubusercontent.com/openvinotoolkit/model_server/releases/2024/2/demos/common/static/images/zebra.jpeg -wget https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/common/python/classes.py +wget https://raw.githubusercontent.com/openvinotoolkit/model_server/releases/2024/2/demos/common/python/classes.py ``` ### Run Inference diff --git a/docs/dynamic_shape_custom_node.md b/docs/dynamic_shape_custom_node.md index 05b25d71b3..4ae8c727e5 100644 --- a/docs/dynamic_shape_custom_node.md +++ b/docs/dynamic_shape_custom_node.md @@ -3,12 +3,12 @@ ## Introduction This guide shows how to configure a simple Directed Acyclic Graph (DAG) with a custom node that performs input resizing before passing input data to the model. -The node below is provided as a demonstration. See instructions for how to build and use the custom node: [Image Transformation](https://github.com/openvinotoolkit/model_server/tree/main/src/custom_nodes/image_transformation). +The node below is provided as a demonstration. See instructions for how to build and use the custom node: [Image Transformation](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/src/custom_nodes/image_transformation). To run inference with this setup, we will use the following: -- Example client in Python [face_detection.py](https://github.com/openvinotoolkit/model_server/blob/main/demos/face_detection/python/face_detection.py) that can be used to request inference on with the desired input shape. +- Example client in Python [face_detection.py](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/demos/face_detection/python/face_detection.py) that can be used to request inference on with the desired input shape. - An example [face_detection_retail_0004](https://docs.openvinotoolkit.org/2021.4/omz_models_model_face_detection_retail_0004.html) model. diff --git a/docs/dynamic_shape_dynamic_model.md b/docs/dynamic_shape_dynamic_model.md index f98506ac98..d7e92a3261 100644 --- a/docs/dynamic_shape_dynamic_model.md +++ b/docs/dynamic_shape_dynamic_model.md @@ -14,7 +14,7 @@ Another option to use dynamic shape feature is to export the model with dynamic To the demonstrate dynamic dimensions, take advantage of: -- Example client in Python [face_detection.py](https://github.com/openvinotoolkit/model_server/blob/main/demos/face_detection/python/face_detection.py) that can be used to request inference with the desired input shape. +- Example client in Python [face_detection.py](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/demos/face_detection/python/face_detection.py) that can be used to request inference with the desired input shape. - An example [face_detection_retail_0004](https://docs.openvinotoolkit.org/2021.4/omz_models_model_face_detection_retail_0004.html) model. diff --git a/docs/llm/reference.md b/docs/llm/reference.md index 797b60a7dc..91f413a39e 100644 --- a/docs/llm/reference.md +++ b/docs/llm/reference.md @@ -15,7 +15,7 @@ It is now integrated into OpenVINO Model Server providing efficient way to run g Check out the [quickstart guide](quickstart.md) for a simple example that shows how to use this feature. ## LLM Calculator -As you can see in the quickstart above, big part of the configuration resides in `graph.pbtxt` file. That's because model server text generation servables are deployed as MediaPipe graphs with dedicated LLM calculator that works with latest [OpenVINO GenAI](https://github.com/ilya-lavrenov/openvino.genai/tree/ct-beam-search/text_generation/causal_lm/cpp/continuous_batching/library) solutions. The calculator is designed to run in cycles and return the chunks of reponses to the client. +As you can see in the quickstart above, big part of the configuration resides in `graph.pbtxt` file. That's because model server text generation servables are deployed as MediaPipe graphs with dedicated LLM calculator that works with latest [OpenVINO GenAI](https://github.com/ilya-lavrenov/openvino.genai/tree/ct-beam-search/text_generation/causal_lm/cpp/continuous_batching/library) solutions. The calculator is designed to run in cycles and return the chunks of responses to the client. On the input it expects a HttpPayload struct passed by the Model Server frontend: ```cpp @@ -26,7 +26,7 @@ struct HttpPayload { rapidjson::Document* parsedJson; // pre-parsed body = null }; ``` -The input json content should be compatible with the [chat completions](./model_server_rest_api_chat.md) or [completions](./model_server_rest_api_completions.md) API. +The input json content should be compatible with the [chat completions](../model_server_rest_api_chat.md) or [completions](../model_server_rest_api_completions.md) API. The input also includes a side packet with a reference to `LLM_NODE_RESOURCES` which is a shared object representing an LLM engine. It loads the model, runs the generation cycles and reports the generated results to the LLM calculator via a generation handler. @@ -83,7 +83,7 @@ The calculator supports the following `node_options` for tuning the pipeline con - `optional uint64 max_num_seqs` - max number of sequences actively processed by the engine [default = 256]; - `optional bool dynamic_split_fuse` - use Dynamic Split Fuse token scheduling [default = true]; - `optional string device` - device to load models to. Supported values: "CPU" [default = "CPU"] -- `optional string plugin_config` - [OpenVINO device plugin configuration](https://docs.openvino.ai/2024/openvino-workflow/running-inference/inference-devices-and-modes.html). Should be provided in the same format for regular [models configuration](./parameters.md#model-configuration-options) [default = ""] +- `optional string plugin_config` - [OpenVINO device plugin configuration](https://docs.openvino.ai/2024/openvino-workflow/running-inference/inference-devices-and-modes.html). Should be provided in the same format for regular [models configuration](../parameters.md#model-configuration-options) [default = ""] The value of `cache_size` might have performance implications. It is used for storing LLM model KV cache data. Adjust it based on your environment capabilities, model size and expected level of concurrency. diff --git a/docs/mediapipe.md b/docs/mediapipe.md index 0d79686fa4..29e6064ccf 100644 --- a/docs/mediapipe.md +++ b/docs/mediapipe.md @@ -229,8 +229,8 @@ the version parameter is ignored. MediaPipe graphs are not versioned. Though, th MediaPipe graphs can include only the calculators built-in the model server image. If you want to add your own mediapipe calculator to OpenVINO Model Server functionality you need to add it as a dependency and rebuild the OpenVINO Model Server binary. -If you have it in external repository, you need to add the http_archive() definition or git_repository() definition to the bazel [WORKSPACE](https://github.com/openvinotoolkit/model_server/blob/main/WORKSPACE) file. -Then you need to add the calculator target as a bazel dependency to the [src/BUILD](https://github.com/openvinotoolkit/model_server/blob/main/src/BUILD) file. This should be done for: +If you have it in external repository, you need to add the http_archive() definition or git_repository() definition to the bazel [WORKSPACE](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/WORKSPACE) file. +Then you need to add the calculator target as a bazel dependency to the [src/BUILD](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/src/BUILD) file. This should be done for: ``` cc_library( diff --git a/docs/mediapipe_conversion.md b/docs/mediapipe_conversion.md index 4ff0cee926..9ec240ae41 100644 --- a/docs/mediapipe_conversion.md +++ b/docs/mediapipe_conversion.md @@ -190,7 +190,7 @@ input_order_list: ["Identity","Identity_1","Identity_2","Identity_3"] ### 3. Adjust graph input/output streams This step is required if you plan to deploy the graph in OpenVINO Model Server and existing graph does not have supported input/output packet types. Check for supported input and output packet types [here](./mediapipe.md). -In that cases you may need to add converter calculators as it was done [here](https://github.com/openvinotoolkit/model_server/blob/main/demos/mediapipe/object_detection/graph.pbtxt#L31). +In that cases you may need to add converter calculators as it was done [here](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/demos/mediapipe/object_detection/graph.pbtxt#L31). ### 4. Set the config.json file path in the session calculator diff --git a/docs/metrics.md b/docs/metrics.md index 0995b40167..73fa7af0dd 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -201,7 +201,7 @@ To use data from metrics endpoint you can use the curl command: ```bash curl http://localhost:8000/metrics ``` -[Example metrics output](https://raw.githubusercontent.com/openvinotoolkit/model_server/main/docs/metrics_output.out) +[Example metrics output](https://raw.githubusercontent.com/openvinotoolkit/model_server/releases/2024/2/docs/metrics_output.out) ## Performance considerations Collecting metrics has negligible performance overhead when used with models of average size and complexity. However when used with very lightweight, fast models which inference time is very short, the metric incrementation can take noticeable proportion of the processing time. Consider it while enabling metrics for such models. @@ -230,7 +230,7 @@ For [MediaPipe Graphs](./mediapipe.md) metrics endpoint is not supported. With server metrics being scraped by [Prometheus](https://prometheus.io/) it is possible to integrate [Grafana](https://grafana.com/) to visualize them on the dashboards. Once you have Grafana configured with Prometheus as a data source, you can create your own dashboard or import one. -In OpenVINO Model Server repository you can find [grafana_dashboard.json](https://github.com/openvinotoolkit/model_server/blob/main/extras/grafana_dashboard.json) file that can be used to visualize per model metrics like: +In OpenVINO Model Server repository you can find [grafana_dashboard.json](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/extras/grafana_dashboard.json) file that can be used to visualize per model metrics like: - Throughput [RPS] - number of requests being processed by the model per second. - Mean Latency [ms] - latency averaged across all requests processed by the model in a certain timeframe. - Latency Quantile [ms] - value of latency for quantiles [0.75, 0.90, 0.99], meaning the latency that has NOT been exceeded by 75%, 90% and 99% of the requests. diff --git a/docs/model_server_c_api.md b/docs/model_server_c_api.md index 1b433f5c3c..42ac47c194 100644 --- a/docs/model_server_c_api.md +++ b/docs/model_server_c_api.md @@ -19,7 +19,7 @@ With OpenVINO Model Server 2023.1 release C-API is no longer in preview state an ## API Description -Server functionalities are encapsulated in shared library built from OpenVINO Model Server source. To include OpenVINO Model Server you need to link this library with your application and use C API defined in [header file](https://github.com/openvinotoolkit/model_server/blob/main/src/ovms.h). +Server functionalities are encapsulated in shared library built from OpenVINO Model Server source. To include OpenVINO Model Server you need to link this library with your application and use C API defined in [header file](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/src/ovms.h). Calling a method to start the model serving in your application initiates the OpenVINO Model Server as a separate thread. Then you can schedule inference both directly from app using C API and gRPC/HTTP endpoints. diff --git a/docs/model_server_grpc_api_kfs.md b/docs/model_server_grpc_api_kfs.md index 2ed4b0390e..687eb66559 100644 --- a/docs/model_server_grpc_api_kfs.md +++ b/docs/model_server_grpc_api_kfs.md @@ -13,7 +13,7 @@ The API includes following endpoints: * [Inference API](#inference-api) * [Streaming Inference API](#streaming-inference-api-extension) -> **NOTE**: Examples of using each of above endpoints can be found in [KServe samples](https://github.com/openvinotoolkit/model_server/tree/main/client/python/kserve-api/samples/README.md). +> **NOTE**: Examples of using each of above endpoints can be found in [KServe samples](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/client/python/kserve-api/samples/README.md). ## Server Live API @@ -57,7 +57,7 @@ Check documentation for more [details](./streaming_endpoints.md). ## See Also -- [Example client code](https://github.com/openvinotoolkit/model_server/tree/main/client/python/kserve-api/samples/README.md) shows how to use GRPC API and REST API. +- [Example client code](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/client/python/kserve-api/samples/README.md) shows how to use GRPC API and REST API. - [KServe API](https://github.com/kserve/kserve/tree/master/docs/predict-api/v2) - [gRPC](https://grpc.io/) diff --git a/docs/model_server_grpc_api_tfs.md b/docs/model_server_grpc_api_tfs.md index 73c0a91ffc..4936b1879c 100644 --- a/docs/model_server_grpc_api_tfs.md +++ b/docs/model_server_grpc_api_tfs.md @@ -18,7 +18,7 @@ Gets information about the status of served models including Model Version [Get Model Status proto](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis/get_model_status.proto) defines three message definitions used while calling Status endpoint: *GetModelStatusRequest*, *ModelVersionStatus*, *GetModelStatusResponse* that are used to report all exposed versions including their state in their lifecycle. - Read more about [Get Model Status API usage](https://github.com/openvinotoolkit/model_server/blob/main/client/python/tensorflow-serving-api/samples/README.md#model-status-api). + Read more about [Get Model Status API usage](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/client/python/tensorflow-serving-api/samples/README.md#model-status-api). ## Model Metadata API @@ -27,7 +27,7 @@ Gets information about the served models. A function called GetModelMetadata acc [Get Model Metadata proto](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis/get_model_metadata.proto) has three message definitions: *SignatureDefMap*, *GetModelMetadataRequest*, *GetModelMetadataResponse*. -Read more about [Get Model Metadata API usage](https://github.com/openvinotoolkit/model_server/blob/main/client/python/tensorflow-serving-api/samples/README.md#model-metadata-api). +Read more about [Get Model Metadata API usage](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/client/python/tensorflow-serving-api/samples/README.md#model-metadata-api). ## Predict API @@ -40,13 +40,13 @@ Endpoint for running an inference with loaded models or [DAGs](./dag_scheduler.m * *PredictResponse* includes a map of outputs serialized by [TensorProto](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor.proto) and information about the used model spec. -Read more about [Predict API usage](https://github.com/openvinotoolkit/model_server/blob/main/client/python/tensorflow-serving-api/samples/README.md#predict-api) +Read more about [Predict API usage](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/client/python/tensorflow-serving-api/samples/README.md#predict-api) Also, using `string_val` field it is possible to send binary encoded images that would be preprocessed by OVMS using opencv and converted to OpenVINO-friendly format. For more information check [how binary data is handled in OpenVINO Model Server](./binary_input_tfs.md) ## See Also -- [Example client code](https://github.com/openvinotoolkit/model_server/blob/main/client/python/tensorflow-serving-api/samples/README.md) shows how to use GRPC API and REST API. +- [Example client code](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/client/python/tensorflow-serving-api/samples/README.md) shows how to use GRPC API and REST API. - [TensorFlow Serving](https://github.com/tensorflow/serving) - [gRPC](https://grpc.io/) diff --git a/docs/model_server_rest_api_chat.md b/docs/model_server_rest_api_chat.md index 2497f1d731..7444b90d95 100644 --- a/docs/model_server_rest_api_chat.md +++ b/docs/model_server_rest_api_chat.md @@ -146,6 +146,6 @@ curl http://localhost/v3/chat/completions \ [Code snippets](./clients_openai.md) -[LLM calculator](./llm_calculator.md) +[LLM calculator](./llm/reference.md) [Developer guide for writing custom calculators with REST API extension](./mediapipe.md) diff --git a/docs/model_server_rest_api_completions.md b/docs/model_server_rest_api_completions.md index 63d257e344..b0c45f6919 100644 --- a/docs/model_server_rest_api_completions.md +++ b/docs/model_server_rest_api_completions.md @@ -124,6 +124,6 @@ curl http://localhost/v3/completions \ [Code snippets](./clients_openai.md) -[LLM calculator](./llm_calculator.md) +[LLM calculator](./llm/reference.md) [Developer guide for writing custom calculators with REST API extension](./mediapipe.md) diff --git a/docs/model_server_rest_api_kfs.md b/docs/model_server_rest_api_kfs.md index 06a5e87aaa..5df514b122 100644 --- a/docs/model_server_rest_api_kfs.md +++ b/docs/model_server_rest_api_kfs.md @@ -36,7 +36,7 @@ Date: Tue, 09 Aug 2022 09:20:24 GMT Content-Length: 2 ``` -See also [code samples](https://github.com/openvinotoolkit/model_server/tree/main/client/python/kserve-api/samples) for getting server liveness with KServe API on HTTP Server Live endpoint. +See also [code samples](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/client/python/kserve-api/samples) for getting server liveness with KServe API on HTTP Server Live endpoint. ## Server Ready API **Description** @@ -63,7 +63,7 @@ Date: Tue, 09 Aug 2022 09:22:14 GMT Content-Length: 2 ``` -See also [code samples](https://github.com/openvinotoolkit/model_server/tree/main/client/python/kserve-api/samples) for getting server readiness with KServe API on HTTP Server Ready endpoint. +See also [code samples](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/client/python/kserve-api/samples) for getting server readiness with KServe API on HTTP Server Ready endpoint. ## Server Metadata API **Description** @@ -103,7 +103,7 @@ $ curl http://localhost:5000/v2 For detailed description of the response contents see [KServe API docs](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#server-metadata). -See also [code samples](https://github.com/openvinotoolkit/model_server/tree/main/client/python/kserve-api/samples) for getting server metadata with KServe API on HTTP Server Metadata endpoint. +See also [code samples](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/client/python/kserve-api/samples) for getting server metadata with KServe API on HTTP Server Metadata endpoint. ## Model Ready API **Description** @@ -130,7 +130,7 @@ Date: Tue, 09 Aug 2022 09:25:31 GMT Content-Length: 2 ``` -See also [code samples](https://github.com/openvinotoolkit/model_server/tree/main/client/python/kserve-api/samples) for getting model readiness with KServe API on HTTP Model Ready endpoint. +See also [code samples](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/client/python/kserve-api/samples) for getting model readiness with KServe API on HTTP Model Ready endpoint. @@ -185,7 +185,7 @@ $ curl http://localhost:8000/v2/models/resnet For detailed description of the response contents see [KServe API docs](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#model-metadata). -See also [code samples](https://github.com/openvinotoolkit/model_server/tree/main/client/python/kserve-api/samples) for running getting model metadata with KServe API on HTTP Model Metadata endpoint. +See also [code samples](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/client/python/kserve-api/samples) for running getting model metadata with KServe API on HTTP Model Metadata endpoint. ## Inference API **Description** @@ -352,4 +352,4 @@ For detailed description of request and response contents see [KServe API docs]( > Note: Using //.. at the end of request URI results in truncated path, which might result in different response than expected. -See also [code samples](https://github.com/openvinotoolkit/model_server/tree/main/client/python/kserve-api/samples) for running inference with KServe API on HTTP Inference endpoint. +See also [code samples](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/client/python/kserve-api/samples) for running inference with KServe API on HTTP Inference endpoint. diff --git a/docs/model_server_rest_api_tfs.md b/docs/model_server_rest_api_tfs.md index 1666a5daa8..8605f5ddf1 100644 --- a/docs/model_server_rest_api_tfs.md +++ b/docs/model_server_rest_api_tfs.md @@ -65,7 +65,7 @@ $ curl http://localhost:8001/v1/models/person-detection/versions/1 ] } ``` -Read more about [Get Model Status API usage](https://github.com/openvinotoolkit/model_server/blob/main/client/python/tensorflow-serving-api/samples/README.md#model-status-api-1) +Read more about [Get Model Status API usage](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/client/python/tensorflow-serving-api/samples/README.md#model-status-api-1) ## Model Metadata API **Description** @@ -148,7 +148,7 @@ $ curl http://localhost:8001/v1/models/person-detection/versions/1/metadata } ``` -Read more about [Get Model Metadata API usage](https://github.com/openvinotoolkit/model_server/blob/main/client/python/tensorflow-serving-api/samples/README.md#model-metadata-api-1) +Read more about [Get Model Metadata API usage](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/client/python/tensorflow-serving-api/samples/README.md#model-metadata-api-1) ## Predict API **Description** @@ -212,7 +212,7 @@ On the server side, the binary encoded data is loaded using OpenCV which then co Check [how binary data is handled in OpenVINO Model Server](./binary_input.md) for more informations. -Read more about [Predict API usage](https://github.com/openvinotoolkit/model_server/blob/main/client/python/tensorflow-serving-api/samples/README.md#predict-api-1) +Read more about [Predict API usage](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/client/python/tensorflow-serving-api/samples/README.md#predict-api-1) ## Config Reload API **Description** diff --git a/docs/ovms_quickstart.md b/docs/ovms_quickstart.md index a7dc336c2a..ca05a2da1e 100644 --- a/docs/ovms_quickstart.md +++ b/docs/ovms_quickstart.md @@ -79,8 +79,8 @@ During this step, the `model` folder is mounted to the Docker container. This f Client scripts are available for quick access to the Model Server. Run an example command to download all required components: ```bash -wget https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/object_detection/python/object_detection.py -wget https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/object_detection/python/requirements.txt +wget https://raw.githubusercontent.com/openvinotoolkit/model_server/releases/2024/2/demos/object_detection/python/object_detection.py +wget https://raw.githubusercontent.com/openvinotoolkit/model_server/releases/2024/2/demos/object_detection/python/requirements.txt wget https://raw.githubusercontent.com/openvinotoolkit/open_model_zoo/master/data/dataset_classes/coco_91cl.txt ``` diff --git a/docs/python_support/reference.md b/docs/python_support/reference.md index 2c902f3e72..035ad8f245 100644 --- a/docs/python_support/reference.md +++ b/docs/python_support/reference.md @@ -6,11 +6,11 @@ Starting with version 2023.3, OpenVINO Model Server supports execution of custom Python code. Such code can execute simple pre- or post-processing as well as complex tasks like image or text generation. - Python execution is enabled via [MediaPipe](../mediapipe.md) by the built-in [`PythonExecutorCalculator`](https://docs.openvino.ai/nightly/ovms_docs_python_support_reference.html#pythonexecutorcalculator) that allows creating graph nodes to execute Python code. Python nodes can be used as standalone servables (single node graphs) or be part of larger MediaPipe graphs. + Python execution is enabled via [MediaPipe](../mediapipe.md) by the built-in [`PythonExecutorCalculator`](https://docs.openvino.ai/2024/ovms_docs_python_support_reference.html#pythonexecutorcalculator) that allows creating graph nodes to execute Python code. Python nodes can be used as standalone servables (single node graphs) or be part of larger MediaPipe graphs. Check out the [quickstart guide](quickstart.md) for a simple example that shows how to use this feature. - Check out [Generative AI demos](https://docs.openvino.ai/nightly/ovms_docs_demos.html#check-out-new-generative-ai-demos) for real life use cases. + Check out [Generative AI demos](https://docs.openvino.ai/2024/ovms_docs_demos.html#check-out-new-generative-ai-demos) for real life use cases. ## Building Docker Image @@ -27,7 +27,7 @@ RUN pip3 install numpy ENTRYPOINT [ `/ovms/bin/ovms` ] ``` -You can also modify `requirements.txt` from our [python demos](https://github.com/openvinotoolkit/model_server/tree/main/demos/python_demos) and from repository top level directory run `make python_image` +You can also modify `requirements.txt` from our [python demos](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/demos/python_demos) and from repository top level directory run `make python_image` ## `OvmsPythonModel` class @@ -109,7 +109,7 @@ For gRPC streaming, there can be multiple graph instances existing at the same t #### Parameters and return value `initialize` is called with `kwargs` parameter which is a dictionary. -`kwargs` contain information from [node configuration](https://docs.openvino.ai/nightly/ovms_docs_python_support_reference.html#pythonexecutorcalculator). Considering a sample: +`kwargs` contain information from [node configuration](https://docs.openvino.ai/2024/ovms_docs_python_support_reference.html#pythonexecutorcalculator). Considering a sample: ```pbtxt node { @@ -158,7 +158,7 @@ def execute(self, inputs): return outputs ``` -More information along with the configuration aspect described can be found in [execution modes](https://docs.openvino.ai/nightly/ovms_docs_python_support_reference.html#execution-modes) section. +More information along with the configuration aspect described can be found in [execution modes](https://docs.openvino.ai/2024/ovms_docs_python_support_reference.html#execution-modes) section. #### Generative @@ -172,7 +172,7 @@ def execute(self, inputs): yield outputs ``` -More information along with the configuration aspect described can be found in [execution modes](https://docs.openvino.ai/nightly/ovms_docs_python_support_reference.html#execution-modes) section. +More information along with the configuration aspect described can be found in [execution modes](https://docs.openvino.ai/2024/ovms_docs_python_support_reference.html#execution-modes) section. #### Parameters and return value @@ -191,7 +191,7 @@ Note that this method returns outputs as a list, but since each output is a sepa - For unary endpoints model server gathers all outputs from the graph and sends them all together in a single response -- For streaming endpoints model server packs output and sends it in the response as soon as it arrives. It means that if `execute` returns a list of `X` outputs, the client will receive those outputs in `X` separate responses. The outputs can then be [gathered using timestamp](https://docs.openvino.ai/nightly/ovms_docs_python_support_reference.html#outputs-synchronization-in-grpc-streaming) that can be found in received responses. +- For streaming endpoints model server packs output and sends it in the response as soon as it arrives. It means that if `execute` returns a list of `X` outputs, the client will receive those outputs in `X` separate responses. The outputs can then be [gathered using timestamp](https://docs.openvino.ai/2024/ovms_docs_python_support_reference.html#outputs-synchronization-in-grpc-streaming) that can be found in received responses. #### Error handling @@ -256,7 +256,7 @@ This `Tensor` class is a C++ class with a Python binding that implements Python *Note*: `datatype` attribute is not part of buffer protocol implementation. Buffer protocol uses `format` value that uses [struct format characters](https://docs.python.org/3/library/struct.html#format-characters). It can be read from `data` memoryview. -There's a mapping between those two - see [datatype considerations](https://docs.openvino.ai/nightly/ovms_docs_python_support_reference.html#datatype-considerations). +There's a mapping between those two - see [datatype considerations](https://docs.openvino.ai/2024/ovms_docs_python_support_reference.html#datatype-considerations). As `pyovms.Tensor` implements buffer protocol it can be converted to another types that also implement buffer protocol: @@ -274,7 +274,7 @@ Inputs will be provided to the `execute` function, but outputs must be prepared `Tensor(name, data, shape=None, datatype=None)` -- `name`: a string that associates Tensor data with specific name. This name is also used by `PythonExecutorCalculator` to push data to the correct output stream in the node. More about it in [node configuration section](https://docs.openvino.ai/nightly/ovms_docs_python_support_reference.html#input-and-output-streams-in-python-code). +- `name`: a string that associates Tensor data with specific name. This name is also used by `PythonExecutorCalculator` to push data to the correct output stream in the node. More about it in [node configuration section](https://docs.openvino.ai/2024/ovms_docs_python_support_reference.html#input-and-output-streams-in-python-code). - `data`: an object that implements Python [Buffer Protocol](https://docs.python.org/3/c-api/buffer.html#buffer-protocol). This could be an instance of some built-in type like `bytes` or types from external modules like `numpy.ndarray`. @@ -282,7 +282,7 @@ Inputs will be provided to the `execute` function, but outputs must be prepared - `datatype` (*optional*): a string defining the type of the data. This value is directly assigned to `datatype` attribute of the `Tensor`. By default, `datatype` attribute is inherited from the `data` object. Providing `datatype` to the constructor will override inherited value, so use it only if you know what you are doing. -**Note**: `shape` and `datatype` arguments do not modify internal structure of the data - there are no reshapes and type conversions. They only override `Tensor.shape` and `Tensor.datatype` attributes, so the user can provide custom context to the next node or server response. It means they can be completely detached from the data buffer properties and it's user's reponsibility to correctly interpret these attributes while reading the `Tensor` in the next node or the server response on the client side. +**Note**: `shape` and `datatype` arguments do not modify internal structure of the data - there are no reshapes and type conversions. They only override `Tensor.shape` and `Tensor.datatype` attributes, so the user can provide custom context to the next node or server response. It means they can be completely detached from the data buffer properties and it's user's responsibility to correctly interpret these attributes while reading the `Tensor` in the next node or the server response on the client side. ```python import numpy as np @@ -304,7 +304,7 @@ class OvmsPythonModel: As `Tensor` gets created from another type it adapts all fields required by the buffer protocol as its own. Depending on how `Tensor` is created `shape` or `datatype` may be overridden. -If they are not provided `Tensor` will adapt another buffer `shape` as it's own and will map it's `format` to a `datatype`. Learn more in [datatype considerations](https://docs.openvino.ai/nightly/ovms_docs_python_support_reference.html#datatype-considerations) section. +If they are not provided `Tensor` will adapt another buffer `shape` as it's own and will map it's `format` to a `datatype`. Learn more in [datatype considerations](https://docs.openvino.ai/2024/ovms_docs_python_support_reference.html#datatype-considerations) section. If the node is connected to another Python node, then Tensors pushed to the output of this node, are inputs of another node. @@ -342,7 +342,7 @@ The same mapping is applied the other way around when creating `Tensor` from ano In some cases, users may work with more complex types that are not listed above and model server also allows that. #### BYTES datatype -If `datatype` "BYTES" is specified and data is located in bytes_contents field of input(for gRPC) or in JSON body(for REST) OVMS converts it to `pyovms.Tensor` buffer according to the format where every input is preceeded by four bytes of its size. +If `datatype` "BYTES" is specified and data is located in bytes_contents field of input(for gRPC) or in JSON body(for REST) OVMS converts it to `pyovms.Tensor` buffer according to the format where every input is preceded by four bytes of its size. For example this gRPC request: bytes_content: [<240 byte element>, <1024 byte element>, <567 byte element>] @@ -477,7 +477,7 @@ class OvmsPythonModel: ... ``` -**Note**: Node configuration and `execute` implementation should always match. For example if the node is configured to work with [incomplete inputs](https://docs.openvino.ai/nightly/ovms_docs_python_support_reference.html#incomplete-inputs), then accessing `Tensors` via index will not be useful. +**Note**: Node configuration and `execute` implementation should always match. For example if the node is configured to work with [incomplete inputs](https://docs.openvino.ai/2024/ovms_docs_python_support_reference.html#incomplete-inputs), then accessing `Tensors` via index will not be useful. ### Graph input and output streams @@ -614,9 +614,9 @@ Learn more about how [MediaPipe flow works in OpenVINO Model Server](../mediapip For inference, data can be send both via [gRPC API](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#grpc) and [KServe API](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#httprest)(only for unary calls). If the graph has a `OvmsPyTensor` output stream, then the data in the KServe response can be found in `raw_output_contents` field (even if data in the request has been placed in `InferTensorContents`). The data passed in the request is accessible in `execute` method of the node connected to graph input via `data` attribute of [`pyovms.Tensor`](https://docs.openvino.ai/2024/ovms_docs_python_support_reference.html#python-tensor) object. -For data of type BYTES send in bytes_contents field of input(for gRPC) or in JSON body(for REST) OVMS converts it to `pyovms.Tensor` buffer according to the format where every input is preceeded by four bytes of its size. +For data of type BYTES send in bytes_contents field of input(for gRPC) or in JSON body(for REST) OVMS converts it to `pyovms.Tensor` buffer according to the format where every input is preceded by four bytes of its size. -Inputs and outputs also define `shape` and `datatype` parameters. Those values are also accessible in `pyovms.Tensor`. For outputs, `datatype` and `shape` are by default read from the underlying buffer, but it is possible to overwrite them (see [`pyovms.Tensor constructor`](https://docs.openvino.ai/nightly/ovms_docs_python_support_reference.html#creating-output-tensors). If you specify `datatype` as `BYTES` in your requests, make sure to review [datatype considerations](https://docs.openvino.ai/2024/ovms_docs_python_support_reference.html#datatype-considerations), since this type is treated differently than the others. +Inputs and outputs also define `shape` and `datatype` parameters. Those values are also accessible in `pyovms.Tensor`. For outputs, `datatype` and `shape` are by default read from the underlying buffer, but it is possible to overwrite them (see [`pyovms.Tensor constructor`](https://docs.openvino.ai/2024/ovms_docs_python_support_reference.html#creating-output-tensors). If you specify `datatype` as `BYTES` in your requests, make sure to review [datatype considerations](https://docs.openvino.ai/2024/ovms_docs_python_support_reference.html#datatype-considerations), since this type is treated differently than the others. Let's see it on an example: @@ -682,11 +682,11 @@ class OvmsPythonModel: Mediapipe graph works with packets and every packet has its timestamp. The timestamps of packets on all streams (both input and output) must be ascending. -When requesting inference, user can decide to use automatic timestamping, or send timestamps themself along with the request as `OVMS_MP_TIMESTAMP` parameter. Learn more about [timestamping](https://docs.openvino.ai/nightly/ovms_docs_streaming_endpoints.html#timestamping) +When requesting inference, user can decide to use automatic timestamping, or send timestamps themself along with the request as `OVMS_MP_TIMESTAMP` parameter. Learn more about [timestamping](https://docs.openvino.ai/2024/ovms_docs_streaming_endpoints.html#timestamping) When it comes to Python node `PythonExecutorCalculator`: -- for [regular execution mode](https://docs.openvino.ai/nightly/ovms_docs_python_support_reference.html#regular-mode) simply propagates timestamp i.e. uses input timestamp as output timestamp. -- for [generative execution mode](https://docs.openvino.ai/nightly/ovms_docs_python_support_reference.html#generative-mode) it saves timestamp of the input and sends first set of outputs downstream with this timestamp. Then timestamp gets incremented with each generation, so next sets of output packages have ascending timestamp. +- for [regular execution mode](https://docs.openvino.ai/2024/ovms_docs_python_support_reference.html#regular-mode) simply propagates timestamp i.e. uses input timestamp as output timestamp. +- for [generative execution mode](https://docs.openvino.ai/2024/ovms_docs_python_support_reference.html#generative-mode) it saves timestamp of the input and sends first set of outputs downstream with this timestamp. Then timestamp gets incremented with each generation, so next sets of output packages have ascending timestamp. **Multiple generation cycles on a single graph instance** @@ -719,7 +719,7 @@ Depending on which mode is used, both the Python code and graph configuration mu #### Regular mode -When using regular mode, the `execute` method in [`OvmsPythonModel`](https://docs.openvino.ai/nightly/ovms_docs_python_support_reference.html#ovmspythonmodel-class) class must `return` value. +When using regular mode, the `execute` method in [`OvmsPythonModel`](https://docs.openvino.ai/2024/ovms_docs_python_support_reference.html#ovmspythonmodel-class) class must `return` value. ```python from pyovms import Tensor @@ -730,7 +730,7 @@ from pyovms import Tensor return [my_output] ``` -When `execute` returns, the [`PythonExecutorCalculator`](https://docs.openvino.ai/nightly/ovms_docs_python_support_reference.html#pythonexecutorcalculator) grabs the outputs and pushes them down the graph. Node `Process` method is called once per inputs set. Such implementation can be paired with basic graph setting, like: +When `execute` returns, the [`PythonExecutorCalculator`](https://docs.openvino.ai/2024/ovms_docs_python_support_reference.html#pythonexecutorcalculator) grabs the outputs and pushes them down the graph. Node `Process` method is called once per inputs set. Such implementation can be paired with basic graph setting, like: ```pbtxt node { @@ -749,7 +749,7 @@ node { #### Generative mode -When using generative mode, the `execute` method in [`OvmsPythonModel`](https://docs.openvino.ai/nightly/ovms_docs_python_support_reference.html#ovmspythonmodel-class) class must `yield` value. +When using generative mode, the `execute` method in [`OvmsPythonModel`](https://docs.openvino.ai/2024/ovms_docs_python_support_reference.html#ovmspythonmodel-class) class must `yield` value. ```python from pyovms import Tensor @@ -761,7 +761,7 @@ from pyovms import Tensor yield [my_output] ``` -When `execute` yields, the [`PythonExecutorCalculator`](https://docs.openvino.ai/nightly/ovms_docs_python_support_reference.html#pythonexecutorcalculator) saves the generator. Then it repeatedly calls it until it reaches the end of generated sequence. Node `Process` method is called multiple times per single inputs set. To trigger such behavior a specific graph configuration is needed. See below: +When `execute` yields, the [`PythonExecutorCalculator`](https://docs.openvino.ai/2024/ovms_docs_python_support_reference.html#pythonexecutorcalculator) saves the generator. Then it repeatedly calls it until it reaches the end of generated sequence. Node `Process` method is called multiple times per single inputs set. To trigger such behavior a specific graph configuration is needed. See below: ```pbtxt node { @@ -843,14 +843,14 @@ Apart from basic configuration present also in regular mode, this graph contains It's recommended not to reuse the same graph instance when the cycle is finished. Instead, if you want to generate for new data, create new gRPC stream. -For working configurations and code samples see the [demos](https://docs.openvino.ai/nightly/ovms_docs_demos.html#check-out-new-generative-ai-demos). +For working configurations and code samples see the [demos](https://docs.openvino.ai/2024/ovms_docs_demos.html#check-out-new-generative-ai-demos). ### Incomplete inputs There are usecases when firing `Process` with only a subset of inputs defined in node configuration is desired. By default, node waits for all inputs with the same timestamp and launches `Process` once they're all available. Such behavior is implemented by the `DefaultInputStreamHandler` which is used by default. To configure the node to launch `Process` with only a subset of inputs you should use a different input stream handler for different [input policy](https://developers.google.com/mediapipe/framework/framework_concepts/synchronization#input_policies). -Such configuration is used in [generative execution mode](https://docs.openvino.ai/nightly/ovms_docs_python_support_reference.html#generative-mode), but let's see another example: +Such configuration is used in [generative execution mode](https://docs.openvino.ai/2024/ovms_docs_python_support_reference.html#generative-mode), but let's see another example: ```pbtxt node { @@ -938,18 +938,18 @@ class OvmsPythonModel: In such case, the client could implement different actions depending on which output it receives on the stream. -Another example of such configuration is signaling that generation is finished when running in [generative mode](https://docs.openvino.ai/nightly/ovms_docs_python_support_reference.html#generative-mode). This solution is used in [text generation demo](https://github.com/openvinotoolkit/model_server/tree/main/demos/python_demos/llm_text_generation). +Another example of such configuration is signaling that generation is finished when running in [generative mode](https://docs.openvino.ai/2024/ovms_docs_python_support_reference.html#generative-mode). This solution is used in [text generation demo](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/demos/python_demos/llm_text_generation). ### Calculator type conversions -Python nodes work with a dedicated [Python Tensor](https://docs.openvino.ai/nightly/ovms_docs_python_support_reference.html#python-tensor) objects that can be used both on C++ and Python side. The downside of that approach is that usually other calculators cannot read and create such objects. It means that Python nodes cannot be directly connected to any other, non-Python nodes. +Python nodes work with a dedicated [Python Tensor](https://docs.openvino.ai/2024/ovms_docs_python_support_reference.html#python-tensor) objects that can be used both on C++ and Python side. The downside of that approach is that usually other calculators cannot read and create such objects. It means that Python nodes cannot be directly connected to any other, non-Python nodes. That's why converter calculators exists. They work as adapters between nodes and implement necessary conversions needed to create a connection between calculators that work on two different types of packets. #### PyTensorOvTensorConverterCalculator -OpenVINO Model Server comes with a built-in `PyTensorOvTensorConverterCalculator` that provides conversion between [Python Tensor](https://docs.openvino.ai/nightly/ovms_docs_python_support_reference.html#python-tensor) and [OV Tensor](https://docs.openvino.ai/2024/api/c_cpp_api/classov_1_1_tensor.html). +OpenVINO Model Server comes with a built-in `PyTensorOvTensorConverterCalculator` that provides conversion between [Python Tensor](https://docs.openvino.ai/2024/ovms_docs_python_support_reference.html#python-tensor) and [OV Tensor](https://docs.openvino.ai/2024/api/c_cpp_api/classov_1_1_tensor.html). Currently `PyTensorOvTensorConverterCalculator` works with only one input and one output. - The stream that expects Python Tensor **must** have tag `OVMS_PY_TENSOR` @@ -1019,4 +1019,4 @@ node { } ``` -See a [CLIP demo](https://github.com/openvinotoolkit/model_server/tree/main/demos/python_demos/clip_image_classification) for a complete example of a graph that uses Python nodes, OV Inference nodes and converter nodes. +See a [CLIP demo](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/demos/python_demos/clip_image_classification) for a complete example of a graph that uses Python nodes, OV Inference nodes and converter nodes. diff --git a/docs/writing_app.md b/docs/writing_app.md index 826bf60454..b1d4f9beea 100644 --- a/docs/writing_app.md +++ b/docs/writing_app.md @@ -21,7 +21,7 @@ OpenAI API `chat/completion` endpoint supports REST API calls with and without s - [TensorFlow Serving REST API](./model_server_rest_api_tfs.md) - [KServe REST API](./model_server_rest_api_kfs.md) - [OpenAI chat completions API](./model_server_rest_api_chat.md) -- [OpenAI completions API](./model_server_rest_api_completion.md) +- [OpenAI completions API](./model_server_rest_api_completions.md) In this section you can find short code samples to interact with OpenVINO Model Server endpoints via: - [TensorFlow Serving API](./clients_tfs.md) diff --git a/extras/nginx-mtls-auth/get_model.sh b/extras/nginx-mtls-auth/get_model.sh index b68049c6b3..7520a4222d 100755 --- a/extras/nginx-mtls-auth/get_model.sh +++ b/extras/nginx-mtls-auth/get_model.sh @@ -17,7 +17,7 @@ curl --create-dirs https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.1/models_bin/2/face-detection-retail-0004/FP32/face-detection-retail-0004.xml https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.1/models_bin/2/face-detection-retail-0004/FP32/face-detection-retail-0004.bin -o model/face-detection-retail-0004.xml -o model/face-detection-retail-0004.bin -curl --fail --create-dirs https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/common/static/images/people/people1.jpeg -o images/people1.jpeg +curl --fail --create-dirs https://raw.githubusercontent.com/openvinotoolkit/model_server/releases/2024/2/demos/common/static/images/people/people1.jpeg -o images/people1.jpeg chmod 666 -vR ./images/ ./model/ chmod +x ./images/ ./model/ diff --git a/src/custom_nodes/east_ocr/README.md b/src/custom_nodes/east_ocr/README.md index 1f3634f846..8a3091858c 100644 --- a/src/custom_nodes/east_ocr/README.md +++ b/src/custom_nodes/east_ocr/README.md @@ -7,7 +7,7 @@ DAG pipeline. Additionally to the detected text boxes, in the two additional outputs are returned their coordinates with information about geometry and confidence levels for the filtered list of detections. -**NOTE** Exemplary [configuration file](https://github.com/openvinotoolkit/model_server/blob/main/demos/optical_character_recognition/python/config.json) is available in [optical character recognition demo](https://github.com/openvinotoolkit/model_server/blob/main/demos/optical_character_recognition/python/). +**NOTE** Exemplary [configuration file](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/demos/optical_character_recognition/python/config.json) is available in [optical character recognition demo](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/demos/optical_character_recognition/python/). # Building custom node library diff --git a/src/custom_nodes/face_blur/README.md b/src/custom_nodes/face_blur/README.md index d5b355faf0..3a89d8a2fd 100644 --- a/src/custom_nodes/face_blur/README.md +++ b/src/custom_nodes/face_blur/README.md @@ -18,7 +18,7 @@ All [OpenVINO Model Zoo](https://github.com/openvinotoolkit/open_model_zoo/tree/ - vehicle-license-plate-detection - pedestrian-and-vehicle-detector -**NOTE** Exemplary [configuration file](https://github.com/openvinotoolkit/model_server/blob/main/demos/face_blur/python/config.json) is available in [face_blur demo](https://github.com/openvinotoolkit/model_server/blob/main/demos/face_blur/python/). +**NOTE** Exemplary [configuration file](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/demos/face_blur/python/config.json) is available in [face_blur demo](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/demos/face_blur/python/). # Building custom node library @@ -48,7 +48,7 @@ make BASE_OS=redhat NODES=face_blur | image | Returns blurred image in place of detected boxes. Boxes are filtered based on confidence_threshold param. Resolution is defined by the node parameters. | `N,C,H,W` | FP32 | # Custom node parameters -Parameters can be defined in pipeline definition in OVMS configuration file. [Read more](https://github.com/openvinotoolkit/model_server/blob/main/docs/custom_node_development.md) about node parameters. +Parameters can be defined in pipeline definition in OVMS configuration file. [Read more](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/docs/custom_node_development.md) about node parameters. | Parameter | Description | Default | Required | | ------------- | ------------- | ------------- | ------------ | | original_image_width | Required input image width | | ✓ | diff --git a/src/custom_nodes/horizontal_ocr/README.md b/src/custom_nodes/horizontal_ocr/README.md index d4c9eeb5b6..8a527b0b3b 100644 --- a/src/custom_nodes/horizontal_ocr/README.md +++ b/src/custom_nodes/horizontal_ocr/README.md @@ -8,7 +8,7 @@ Additionally to the detected text boxes, in the two additional outputs are retur This custom node can be used to process video frames via [camera example](../../../demos/horizontal_text_detection/python/README.md). -**NOTE** Exemplary [configuration file](https://github.com/openvinotoolkit/model_server/blob/main/demos/horizontal_text_detection/python/config.json) is available in [demo with camera](https://github.com/openvinotoolkit/model_server/blob/main/demos/horizontal_text_detection/python/). +**NOTE** Exemplary [configuration file](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/demos/horizontal_text_detection/python/config.json) is available in [demo with camera](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/demos/horizontal_text_detection/python/). # Building custom node library diff --git a/src/custom_nodes/image_transformation/README.md b/src/custom_nodes/image_transformation/README.md index 351362ab44..7401484eff 100644 --- a/src/custom_nodes/image_transformation/README.md +++ b/src/custom_nodes/image_transformation/README.md @@ -9,7 +9,7 @@ This custom node takes image with dynamic shape (color, width, height) as an inp Important to note that this node uses OpenCV for processing so for good performance results prefers NHWC layout. In other cases conversion applies which reduces performance of this node. -**NOTE** Exemplary configuration files are available in [onnx model with server preprocessing demo](https://github.com/openvinotoolkit/model_server/tree/main/demos/using_onnx_model/python) and [config with single node](example_config.json). +**NOTE** Exemplary configuration files are available in [onnx model with server preprocessing demo](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/demos/using_onnx_model/python) and [config with single node](example_config.json). # Building custom node library diff --git a/src/custom_nodes/model_zoo_intel_object_detection/README.md b/src/custom_nodes/model_zoo_intel_object_detection/README.md index ff31f2d344..2edd138697 100644 --- a/src/custom_nodes/model_zoo_intel_object_detection/README.md +++ b/src/custom_nodes/model_zoo_intel_object_detection/README.md @@ -25,7 +25,7 @@ All [OpenVINO Model Zoo](https://github.com/openvinotoolkit/open_model_zoo/tree/ Public [OpenVINO Model Zoo](https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/public) object detection models with output tensor shape: `[1, 1, 100, 7]`: - ssdlite_mobilenet_v2 -**NOTE** Exemplary configuration files are available in [vehicle analysis pipeline demo](https://github.com/openvinotoolkit/model_server/blob/main/demos/horizontal_text_detection/python/config.json) and [multiple faces analysis demo](https://github.com/openvinotoolkit/model_server/blob/main/demos/multi_faces_analysis_pipeline/python/config.json). +**NOTE** Exemplary configuration files are available in [vehicle analysis pipeline demo](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/demos/horizontal_text_detection/python/config.json) and [multiple faces analysis demo](https://github.com/openvinotoolkit/model_server/blob/releases/2024/2/demos/multi_faces_analysis_pipeline/python/config.json). # Building custom node library diff --git a/src/example/SampleCpuExtension/README.md b/src/example/SampleCpuExtension/README.md index f42abd427d..065724f3d1 100644 --- a/src/example/SampleCpuExtension/README.md +++ b/src/example/SampleCpuExtension/README.md @@ -8,7 +8,7 @@ custom extension execution. ## Creating cpu_extension library -Compile the library by running `make cpu_extension BASE_OS=ubuntu` in root directory of [Model Server repository](https://github.com/openvinotoolkit/model_server/tree/main). The implementation of this library slightly differs from the template in OpenVINO™ repository and can be found in [SampleCpuExtension directory](https://github.com/openvinotoolkit/model_server/tree/main/src/example/SampleCpuExtension). +Compile the library by running `make cpu_extension BASE_OS=ubuntu` in root directory of [Model Server repository](https://github.com/openvinotoolkit/model_server/tree/main). The implementation of this library slightly differs from the template in OpenVINO™ repository and can be found in [SampleCpuExtension directory](https://github.com/openvinotoolkit/model_server/tree/releases/2024/2/src/example/SampleCpuExtension). Shared library will be generated in the `lib` folder. Such library can be used to run Model Server, using `--cpu_extension` argument.