diff --git a/.ci/spellcheck/.pyspelling.wordlist.txt b/.ci/spellcheck/.pyspelling.wordlist.txt index c94ebc0ce58..196de007c6c 100644 --- a/.ci/spellcheck/.pyspelling.wordlist.txt +++ b/.ci/spellcheck/.pyspelling.wordlist.txt @@ -182,6 +182,7 @@ DepthAnything detections detokenization detokenizer +detokenizers Dettmers dev detectron diff --git a/notebooks/openvino-tokenizers/openvino-tokenizers.ipynb b/notebooks/openvino-tokenizers/openvino-tokenizers.ipynb index 75806a1837f..19cf872acfd 100644 --- a/notebooks/openvino-tokenizers/openvino-tokenizers.ipynb +++ b/notebooks/openvino-tokenizers/openvino-tokenizers.ipynb @@ -18,6 +18,7 @@ " - [Convert Tokenizer from HuggingFace Hub with CLI Tool](#Convert-Tokenizer-from_HuggingFace-Hub-with-CLI-Tool)\n", " - [Convert Tokenizer from HuggingFace Hub with Python API](#Convert-Tokenizer-from-HuggingFace-Hub-with-Python-API)\n", "- [Text Generation Pipeline with OpenVINO Tokenizers](#Text-Generation-Pipeline-with-OpenVINO-Tokenizers)\n", + "- [Text Generation Pipeline with OpenVINO GenAI and OpenVINO Tokenizers](#text-generation-pipeline-with-openvino-genai-and-openvino-tokenizers)\n", "- [Merge Tokenizer into a Model](#Merge-Tokenizer-into-a-Model)\n", "- [Conclusion](#Conclusion)\n", "- [Links](#Links)\n", @@ -65,13 +66,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "0c698c94-b852-4b06-b699-7d417fb55e10", "metadata": {}, "outputs": [], "source": [ "%pip install -Uq pip\n", - "%pip install --pre -Uq openvino-tokenizers[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly\n", + "%pip install -q -U \"openvino>=2024.3.0\" openvino-tokenizers[transformers] openvino-genai\n", "%pip install \"numpy<2.0.0\" \"torch>=2.1\" --extra-index-url https://download.pytorch.org/whl/cpu" ] }, @@ -155,7 +156,7 @@ " ]>,\n", " \n", + " \n", " ]\n", " outputs[\n", " \n", @@ -182,7 +183,9 @@ "id": "cd211b9f-37a9-4ae7-bc3e-4619643c08b8", "metadata": {}, "source": [ - "That way you get OpenVINO model objects. Use `save_model` function from OpenVINO to reuse converted tokenizers later:" + "That way you get OpenVINO model objects. Use `save_model` function from OpenVINO to reuse converted tokenizers later:\n", + "\n", + "> ⚠️ Import `openvino_tokenizers` will add all tokenizer-related operations to OpenVINO, after which you can work with saved tokenizers and detokenizers." ] }, { @@ -192,11 +195,14 @@ "metadata": {}, "outputs": [], "source": [ - "from openvino import save_model\n", + "import openvino as ov\n", + "\n", + "# This import is needed to add all tokenizer-related operations to OpenVINO\n", + "import openvino_tokenizers # noqa: F401\n", "\n", "\n", - "save_model(ov_tokenizer, tokenizer_dir / \"openvino_tokenizer.xml\")\n", - "save_model(ov_detokenizer, tokenizer_dir / \"openvino_detokenizer.xml\")" + "ov.save_model(ov_tokenizer, tokenizer_dir / \"openvino_tokenizer.xml\")\n", + "ov.save_model(ov_detokenizer, tokenizer_dir / \"openvino_detokenizer.xml\")" ] }, { @@ -219,15 +225,12 @@ "text": [ "Token ids: [[ 1 4321]\n", " [ 1 6031]]\n", - "Detokenized text: [' Test' ' strings']\n" + "Detokenized text: ['Test' 'strings']\n" ] } ], "source": [ - "from openvino import compile_model\n", - "\n", - "\n", - "tokenizer, detokenizer = compile_model(ov_tokenizer), compile_model(ov_detokenizer)\n", + "tokenizer, detokenizer = ov.compile_model(ov_tokenizer), ov.compile_model(ov_detokenizer)\n", "test_strings = [\"Test\", \"strings\"]\n", "\n", "token_ids = tokenizer(test_strings)[\"input_ids\"]\n", @@ -255,23 +258,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Token ids: [[1, 4321], [1, 6031]]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2024-04-02 18:45:50.238827: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", - "2024-04-02 18:45:50.275055: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", - "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2024-04-02 18:45:50.909410: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "Token ids: [[1, 4321], [1, 6031]]\n", "Detokenized text: [' Test', ' strings']\n" ] } @@ -307,11 +294,11 @@ "model_dir = Path(Path(model_id).name)\n", "\n", "if not model_dir.exists():\n", - " # converting the original model\n", + " # Converting the original model\n", " # %pip install -U \"git+https://github.com/huggingface/optimum-intel.git\" \"nncf>=2.8.0\" onnx\n", " # %optimum-cli export openvino -m $model_id --task text-generation-with-past $model_dir\n", "\n", - " # load already converted model\n", + " # Load already converted model\n", " from huggingface_hub import hf_hub_download\n", "\n", " hf_hub_download(\n", @@ -336,15 +323,12 @@ "import numpy as np\n", "from tqdm.notebook import trange\n", "from pathlib import Path\n", - "from openvino_tokenizers import add_greedy_decoding\n", "from openvino_tokenizers.constants import EOS_TOKEN_ID_NAME\n", - "from openvino import Core\n", "\n", "\n", - "core = Core()\n", + "core = ov.Core()\n", "\n", - "# add the greedy decoding subgraph on top of LLM to get the most probable token as an output\n", - "ov_model = add_greedy_decoding(core.read_model(model_dir / \"openvino_model.xml\"))\n", + "ov_model = core.read_model(model_dir / \"openvino_model.xml\")\n", "compiled_model = core.compile_model(ov_model)\n", "infer_request = compiled_model.create_infer_request()" ] @@ -354,7 +338,7 @@ "id": "f6f53a35-446d-4a07-9e67-5794a53b12ba", "metadata": {}, "source": [ - "The `infer_request` object provides control over the model's state - a Key-Value cache that speeds up inference by reducing computations Multiple inference requests can be created, and each request maintains a distinct and separate state.." + "The `infer_request` object provides control over the model's state - a Key-Value cache that speeds up inference by reducing computations. Multiple inference requests can be created, and each request maintains a distinct and separate state." ] }, { @@ -366,12 +350,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "af5dd23fe83c4fed8ce6b7b0a8ed41e9", + "model_id": "9135a4ff0a7141949c4990d803862bc0", "version_major": 2, "version_minor": 0 }, "text/plain": [ - " 0%| | 0/10 [00:00\n", "\n", - "The OpenVINO Python API allows you to avoid this by using the `share_inputs` option during inference, but it requires additional input from a developer every time the model is inferred. Combining the models and tokenizers simplifies memory management." + "The OpenVINO Python API allows you to avoid this by using the `share_inputs` option during inference, but it requires additional input from a developer every time the model is inferred. Combining the models and tokenizers simplifies memory management.\n", + "Moreover, after the combining models inputs have changed - original model has three inputs (`input_ids`, `attention_mask`, `token_type_ids`) and combined model has only one input for text input prompt." ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 45, "id": "c044b56b-dae0-4fdb-97df-2aa555285f35", "metadata": {}, "outputs": [], @@ -470,7 +506,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 69, "id": "c6a42d4c-1982-41b9-9612-aa19138518ac", "metadata": {}, "outputs": [ @@ -478,27 +514,42 @@ "name": "stdout", "output_type": "stream", "text": [ - "Logits: [[ 1.2007061 -1.4698029]]\n" + "Original OpenVINO model inputs:\n", + "\n", + "\n", + "\n", + "\n", + "Combined OpenVINO model inputs:\n", + "\n", + "\n", + "Logits: [[ 1.2007061 -1.469803 ]]\n" ] } ], "source": [ - "from openvino import Core, save_model\n", "from openvino_tokenizers import connect_models\n", "\n", "\n", - "core = Core()\n", + "core = ov.Core()\n", "text_input = [\"Free money!!!\"]\n", "\n", "ov_tokenizer = core.read_model(model_dir / \"openvino_tokenizer.xml\")\n", "ov_model = core.read_model(model_dir / \"openvino_model.xml\")\n", "combined_model = connect_models(ov_tokenizer, ov_model)\n", - "save_model(combined_model, model_dir / \"combined_openvino_model.xml\")\n", + "ov.save_model(combined_model, model_dir / \"combined_openvino_model.xml\")\n", + "\n", + "print(\"Original OpenVINO model inputs:\")\n", + "for input in ov_model.inputs:\n", + " print(input)\n", + "\n", + "print(\"\\nCombined OpenVINO model inputs:\")\n", + "for input in combined_model.inputs:\n", + " print(input)\n", "\n", "compiled_combined_model = core.compile_model(combined_model)\n", "openvino_output = compiled_combined_model(text_input)\n", "\n", - "print(f\"Logits: {openvino_output['logits']}\")" + "print(f\"\\nLogits: {openvino_output['logits']}\")" ] }, { @@ -538,7 +589,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.9.19" }, "openvino_notebooks": { "imageUrl": "https://github.com/openvinotoolkit/openvino_notebooks/assets/51917466/047f9167-a4ef-4d3d-a33b-d124541f9e2c",