Split text samples to sepparate folders

openvinotoolkit · Jun 5, 2024 · b460912 · b460912
1 parent da96019
commit b460912
Show file tree

Hide file tree

Showing 29 changed files with 532 additions and 98 deletions.
diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -21,9 +21,21 @@ project(OpenVINOGenAI VERSION 2024.2.0.0)
 
 add_subdirectory(./thirdparty/)
 add_subdirectory(src)
-add_subdirectory(text_generation/causal_lm/cpp)
+add_subdirectory(samples/cpp/beam_search_causal_lm/)
+add_subdirectory(samples/cpp/chat_sample/)
+add_subdirectory(samples/cpp/greedy_causal_lm/)
+add_subdirectory(samples/cpp/multinomial_causal_lm/)
+add_subdirectory(samples/cpp/prompt_lookup_decoding_lm/)
+add_subdirectory(samples/cpp/speculative_decoding_lm/)
 
-install(DIRECTORY text_generation/causal_lm/cpp/ DESTINATION samples/cpp/causal_lm COMPONENT cpp_samples_genai)
+install(DIRECTORY
+        ./samples/cpp/beam_search_causal_lm
+        ./samples/cpp/chat_sample
+        ./samples/cpp/greedy_causal_lm
+        ./samples/cpp/multinomial_causal_lm
+        # Don't install prompt_lookup_decoding_lm and speculative_decoding_lm because they don't use openvino_genai library and arent verifyed yet.
+    DESTINATION samples/cpp/ COMPONENT cpp_samples_genai)
+install(FILES ./samples/cpp/requirements.txt DESTINATION samples/cpp/ COMPONENT cpp_samples_genai)
 install(FILES LICENSE DESTINATION licensing COMPONENT licensing_genai RENAME LICENSE-GENAI)
 install(FILES third-party-programs.txt DESTINATION licensing COMPONENT licensing_genai RENAME third-party-programs-genai.txt)
 if(MSVC AND NOT DEFINED CPACK_GENERATOR)

diff --git a/text_generation/causal_lm/cpp/README.md → samples/cpp/README.md b/text_generation/causal_lm/cpp/README.md → samples/cpp/README.md
diff --git a/samples/cpp/beam_search_causal_lm/CMakeLists.txt b/samples/cpp/beam_search_causal_lm/CMakeLists.txt
@@ -0,0 +1,14 @@
+# Copyright (C) 2023-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+find_package(OpenVINOGenAI REQUIRED PATHS
+    "${CMAKE_BINARY_DIR}"  # Reuse the package from the build.
+    ${OpenVINO_DIR}  # GenAI may be installed alogside OpenVINO.
+)
+add_executable(beam_search_causal_lm beam_search_causal_lm.cpp)
+target_link_libraries(beam_search_causal_lm PRIVATE openvino::genai)
+target_compile_features(beam_search_causal_lm PRIVATE cxx_std_17)
+install(TARGETS beam_search_causal_lm
+    RUNTIME DESTINATION samples_bin/
+    COMPONENT samples_bin
+    EXCLUDE_FROM_ALL)
diff --git a/samples/cpp/beam_search_causal_lm/README.md b/samples/cpp/beam_search_causal_lm/README.md
@@ -0,0 +1,50 @@
+# Text generation C++ sample that supports most popular models like LLaMA 2
+
+This example showcases inference of text-generation Large Language Models (LLMs): `chatglm`, `LLaMA`, `Qwen` and other models with the same signature. The application don't have many configuration options to encourage the reader to explore and modify the source code. It's only possible to change the device for inference to a differnt one, GPU for example, from the command line interface. The sample fearures `ov::genai::LLMPipeline` and configures it to use multiple beam grops. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/254-llm-chatbot) which provides an example of LLM-powered Chatbot in Python.
+
+## Install OpenVINO
+
+Install [OpenVINO Archives >= 2024.2](docs.openvino.ai/install). `master` and possibly the latest `releases/*` branch correspond to not yet released OpenVINO versions. https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/ can be used for these branches early testing. `<INSTALL_DIR>` below refers to the extraction location.
+
+## Install OpenVINOGenAI
+
+Follow [../../../src/README.md](../../../src/README.md).
+
+## Download and convert the model and tokenizers
+
+The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
+
+#### Linux/macOS
+
+```sh
+source <INSTALL_DIR>/setupvars.sh
+python3 -m pip install --upgrade-strategy eager -r ../../requirements.txt
+optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
+```
+
+#### Windows
+
+```bat
+<INSTALL_DIR>\setupvars.bat
+python -m pip install --upgrade-strategy eager -r requirements.txt
+optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
+```
+
+## Run
+
+### Usage:
+`beam_search_causal_lm <MODEL_DIR> "<PROMPT>"`
+
+### Examples:
+
+#### Linux/MacOS:
+`./build/samples/cpp/beam_search_causal_lm/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "Why is the Sun yellow?"`
+
+#### Windows:
+`.\build\samples\cpp\beam_search_causal_lm\Release\beam_search_causal_lm .\TinyLlama-1.1B-Chat-v1.0\ "Why is the Sun yellow?"`
+
+To enable Unicode characters for Windows cmd open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
+
+Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model meta-llama/Llama-2-13b-chat-hf can benefit from being run on a dGPU. Modify the source code to change the device for inference to the GPU.
+
+See [../../../src/README.md#supported-models](../../src/README.md#supported-models) for the list of supported models.
diff --git a/...n/causal_lm/cpp/beam_search_causal_lm.cpp → ...earch_causal_lm/beam_search_causal_lm.cpp b/...n/causal_lm/cpp/beam_search_causal_lm.cpp → ...earch_causal_lm/beam_search_causal_lm.cpp
diff --git a/samples/cpp/chat_sample/CMakeLists.txt b/samples/cpp/chat_sample/CMakeLists.txt
@@ -0,0 +1,14 @@
+# Copyright (C) 2023-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+find_package(OpenVINOGenAI REQUIRED PATHS
+    "${CMAKE_BINARY_DIR}"  # Reuse the package from the build.
+    ${OpenVINO_DIR}  # GenAI may be installed alogside OpenVINO.
+)
+add_executable(chat_sample chat_sample.cpp)
+target_link_libraries(chat_sample PRIVATE openvino::genai)
+target_compile_features(chat_sample PRIVATE cxx_std_17)
+install(TARGETS chat_sample
+    RUNTIME DESTINATION samples_bin/
+    COMPONENT samples_bin
+    EXCLUDE_FROM_ALL)
diff --git a/samples/cpp/chat_sample/README.md b/samples/cpp/chat_sample/README.md
@@ -0,0 +1,50 @@
+# C++ chat_sample that supports most popular models like LLaMA 2
+
+This example showcases inference of text-generation Large Language Models (LLMs): `chatglm`, `LLaMA`, `Qwen` and other models with the same signature. The application don't have many configuration options to encourage the reader to explore and modify the source code. For example, change the device for inference to GPU. The sample fearures `ov::genai::LLMPipeline` and configures it for the chat scenario. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/254-llm-chatbot) which provides an example of LLM-powered Chatbot in Python.
+
+## Install OpenVINO
+
+Install [OpenVINO Archives >= 2024.2](docs.openvino.ai/install). `master` and possibly the latest `releases/*` branch correspond to not yet released OpenVINO versions. https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/ can be used for these branches early testing. `<INSTALL_DIR>` below refers to the extraction location.
+
+## Install OpenVINOGenAI
+
+Follow [../../../src/README.md](../../../src/README.md).
+
+## Download and convert the model and tokenizers
+
+The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
+
+#### Linux/macOS
+
+```sh
+source <INSTALL_DIR>/setupvars.sh
+python3 -m pip install --upgrade-strategy eager -r ../../requirements.txt
+optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
+```
+
+#### Windows
+
+```bat
+<INSTALL_DIR>\setupvars.bat
+python -m pip install --upgrade-strategy eager -r requirements.txt
+optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
+```
+
+## Run
+
+### Usage:
+`chat_sample <MODEL_DIR>`
+
+### Examples:
+
+#### Linux/MacOS:
+`./build/samples/cpp/chat_sample/chat_sample ./TinyLlama-1.1B-Chat-v1.0/`
+
+#### Windows:
+`.\build\samples\cpp\chat_sample\Release\chat_sample .\TinyLlama-1.1B-Chat-v1.0\`
+
+To enable Unicode characters for Windows cmd open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
+
+Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model meta-llama/Llama-2-13b-chat-hf can benefit from being run on a dGPU. Modify the source code to change the device for inference to the GPU.
+
+See [../../../src/README.md#supported-models](../../src/README.md#supported-models) for the list of supported models.
diff --git a/..._generation/causal_lm/cpp/chat_sample.cpp → samples/cpp/chat_sample/chat_sample.cpp b/..._generation/causal_lm/cpp/chat_sample.cpp → samples/cpp/chat_sample/chat_sample.cpp
diff --git a/samples/cpp/greedy_causal_lm/CMakeLists.txt b/samples/cpp/greedy_causal_lm/CMakeLists.txt
@@ -0,0 +1,14 @@
+# Copyright (C) 2023-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+find_package(OpenVINOGenAI REQUIRED PATHS
+    "${CMAKE_BINARY_DIR}"  # Reuse the package from the build.
+    ${OpenVINO_DIR}  # GenAI may be installed alogside OpenVINO.
+)
+add_executable(greedy_causal_lm greedy_causal_lm.cpp)
+target_link_libraries(greedy_causal_lm PRIVATE openvino::genai)
+target_compile_features(greedy_causal_lm PRIVATE cxx_std_17)
+install(TARGETS greedy_causal_lm
+    RUNTIME DESTINATION samples_bin/
+    COMPONENT samples_bin
+    EXCLUDE_FROM_ALL)
diff --git a/samples/cpp/greedy_causal_lm/README.md b/samples/cpp/greedy_causal_lm/README.md
@@ -0,0 +1,50 @@
+# Text generation C++ greedy_causal_lm that supports most popular models like LLaMA 2
+
+This example showcases inference of text-generation Large Language Models (LLMs): `chatglm`, `LLaMA`, `Qwen` and other models with the same signature. The application don't have many configuration options to encourage the reader to explore and modify the source code. It's only possible to change the device for inference to a differnt one, GPU for example, from the command line interface. The sample fearures `ov::genai::LLMPipeline` and configures it to run the simplest deterministic greedy sampling algorithm. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/254-llm-chatbot) which provides an example of LLM-powered Chatbot in Python.
+
+## Install OpenVINO
+
+Install [OpenVINO Archives >= 2024.2](docs.openvino.ai/install). `master` and possibly the latest `releases/*` branch correspond to not yet released OpenVINO versions. https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/ can be used for these branches early testing. `<INSTALL_DIR>` below refers to the extraction location.
+
+## Install OpenVINOGenAI
+
+Follow [../../../src/README.md](../../../src/README.md).
+
+## Download and convert the model and tokenizers
+
+The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
+
+#### Linux/macOS
+
+```sh
+source <INSTALL_DIR>/setupvars.sh
+python3 -m pip install --upgrade-strategy eager -r ../../requirements.txt
+optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
+```
+
+#### Windows
+
+```bat
+<INSTALL_DIR>\setupvars.bat
+python -m pip install --upgrade-strategy eager -r requirements.txt
+optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
+```
+
+## Run
+
+### Usage:
+`greedy_causal_lm <MODEL_DIR> "<PROMPT>"`
+
+### Examples:
+
+#### Linux/MacOS:
+`./build/samples/cpp/greedy_causal_lm/greedy_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "Why is the Sun yellow?"`
+
+#### Windows:
+`.\build\samples\cpp\greedy_causal_lm\Release\greedy_causal_lm .\TinyLlama-1.1B-Chat-v1.0\ "Why is the Sun yellow?"`
+
+To enable Unicode characters for Windows cmd open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
+
+Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model meta-llama/Llama-2-13b-chat-hf can benefit from being run on a dGPU. Modify the source code to change the device for inference to the GPU.
+
+See [../../../src/README.md#supported-models](../../src/README.md#supported-models) for the list of supported models.
diff --git a/...ration/causal_lm/cpp/greedy_causal_lm.cpp → ...cpp/greedy_causal_lm/greedy_causal_lm.cpp b/...ration/causal_lm/cpp/greedy_causal_lm.cpp → ...cpp/greedy_causal_lm/greedy_causal_lm.cpp
diff --git a/samples/cpp/multinomial_causal_lm/CMakeLists.txt b/samples/cpp/multinomial_causal_lm/CMakeLists.txt
@@ -0,0 +1,14 @@
+# Copyright (C) 2023-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+find_package(OpenVINOGenAI REQUIRED PATHS
+    "${CMAKE_BINARY_DIR}"  # Reuse the package from the build.
+    ${OpenVINO_DIR}  # GenAI may be installed alogside OpenVINO.
+)
+add_executable(multinomial_causal_lm multinomial_causal_lm.cpp)
+target_link_libraries(multinomial_causal_lm PRIVATE openvino::genai)
+target_compile_features(greedy_causal_lm PRIVATE cxx_std_17)
+install(TARGETS multinomial_causal_lm
+    RUNTIME DESTINATION samples_bin/
+    COMPONENT samples_bin
+    EXCLUDE_FROM_ALL)
diff --git a/samples/cpp/multinomial_causal_lm/README.md b/samples/cpp/multinomial_causal_lm/README.md
@@ -0,0 +1,50 @@
+# Text generation C++ multinomial_causal_lm that supports most popular models like LLaMA 2
+
+This example showcases inference of text-generation Large Language Models (LLMs): `chatglm`, `LLaMA`, `Qwen` and other models with the same signature. The application don't have many configuration options to encourage the reader to explore and modify the source code. It's only possible to change the device for inference to a differnt one, GPU for example, from the command line interface. The sample fearures `ov::genai::LLMPipeline` and configures it to run random sampling algorithm. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/254-llm-chatbot) which provides an example of LLM-powered Chatbot in Python.
+
+## Install OpenVINO
+
+Install [OpenVINO Archives >= 2024.2](docs.openvino.ai/install). `master` and possibly the latest `releases/*` branch correspond to not yet released OpenVINO versions. https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/ can be used for these branches early testing. `<INSTALL_DIR>` below refers to the extraction location.
+
+## Install OpenVINOGenAI
+
+Follow [../../../src/README.md](../../../src/README.md).
+
+## Download and convert the model and tokenizers
+
+The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
+
+#### Linux/macOS
+
+```sh
+source <INSTALL_DIR>/setupvars.sh
+python3 -m pip install --upgrade-strategy eager -r ../../requirements.txt
+optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
+```
+
+#### Windows
+
+```bat
+<INSTALL_DIR>\setupvars.bat
+python -m pip install --upgrade-strategy eager -r requirements.txt
+optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
+```
+
+## Run
+
+### Usage:
+`multinomial_causal_lm <MODEL_DIR> "<PROMPT>"`
+
+### Examples:
+
+#### Linux/MacOS:
+`./build/samples/cpp/multinomial_causal_lm/multinomial_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "Why is the Sun yellow?"`
+
+#### Windows:
+`.\build\sampels\cpp\multinomial_causal_lm\Release\multinomial_causal_lm .\TinyLlama-1.1B-Chat-v1.0\ "Why is the Sun yellow?"`
+
+To enable Unicode characters for Windows cmd open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
+
+Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model meta-llama/Llama-2-13b-chat-hf can benefit from being run on a dGPU. Modify the source code to change the device for inference to the GPU.
+
+See [../../../src/README.md#supported-models](../../src/README.md#supported-models) for the list of supported models.
diff --git a/...n/causal_lm/cpp/multinomial_causal_lm.cpp → ...omial_causal_lm/multinomial_causal_lm.cpp b/...n/causal_lm/cpp/multinomial_causal_lm.cpp → ...omial_causal_lm/multinomial_causal_lm.cpp
diff --git a/samples/cpp/prompt_lookup_decoding_lm/CMakeLists.txt b/samples/cpp/prompt_lookup_decoding_lm/CMakeLists.txt
@@ -0,0 +1,18 @@
+# Copyright (C) 2023-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+if(TARGET openvino_tokenizers)
+    set(OPENVINO_TOKENIZERS_PATH $<TARGET_FILE:openvino_tokenizers>)
+else()
+    message(FATAL_ERROR "multinomial_causal_lm must be compiled as part of OpenVIINOGenAI to have the path to openvino_tokenizers hardcoded.")
+endif()
+find_package(OpenVINO REQUIRED COMPONENTS Runtime)
+find_package(TBB REQUIRED COMPONENTS tbb)
+add_executable(prompt_lookup_decoding_lm prompt_lookup_decoding_lm.cpp)
+target_link_libraries(prompt_lookup_decoding_lm PRIVATE openvino::runtime TBB::tbb)
+target_compile_definitions(prompt_lookup_decoding_lm PRIVATE OPENVINO_TOKENIZERS_PATH="${OPENVINO_TOKENIZERS_PATH}")
+target_compile_features(prompt_lookup_decoding_lm PRIVATE cxx_std_17)
+install(TARGETS prompt_lookup_decoding_lm
+    RUNTIME DESTINATION samples_bin/
+    COMPONENT samples_bin
+    EXCLUDE_FROM_ALL)
diff --git a/samples/cpp/prompt_lookup_decoding_lm/README.md b/samples/cpp/prompt_lookup_decoding_lm/README.md
@@ -0,0 +1,52 @@
+# prompt_lookup_decoding_lm C++ sample that supports most popular models like LLaMA 2
+
+[Prompt Lookup decoding](https://github.com/apoorvumang/prompt-lookup-decoding) is [assested-generation](https://huggingface.co/blog/assisted-generation#understanding-text-generation-latency) technique where the draft model is replaced with simple string matching the prompt to generate candidate token sequences. This method highly effective for input grounded generation (summarization, document QA, multi-turn chat, code editing), where there is high n-gram overlap between LLM input (prompt) and LLM output. This could be entity names, phrases, or code chunks that the LLM directly copies from the input while generating the output. Prompt lookup exploits this pattern to speed up autoregressive decoding in LLMs. This results in significant speedups with no effect on output quality.
+
+This example showcases inference of text-generation Large Language Models (LLMs): `chatglm`, `LLaMA`, `Qwen` and other models with the same signature. The application don't have many configuration options to encourage the reader to explore and modify the source code.  Loading `openvino_tokenizers` to `ov::Core` enables tokenization. Run `optimum-cli` to generate IRs for the samples. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/254-llm-chatbot) which provides an example of LLM-powered Chatbot in Python.
+
+## Install OpenVINO
+
+Install [OpenVINO Archives >= 2024.2](docs.openvino.ai/install). `master` and possibly the latest `releases/*` branch correspond to not yet released OpenVINO versions. https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/ can be used for these branches early testing. `<INSTALL_DIR>` below refers to the extraction location.
+
+## Install OpenVINOGenAI
+
+Follow [../../../src/README.md](../../../src/README.md).
+
+## Download and convert the model and tokenizers
+
+The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
+
+#### Linux/macOS
+
+```sh
+source <INSTALL_DIR>/setupvars.sh
+python3 -m pip install --upgrade-strategy eager -r ../../requirements.txt
+optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
+```
+
+#### Windows
+
+```bat
+<INSTALL_DIR>\setupvars.bat
+python -m pip install --upgrade-strategy eager -r requirements.txt
+optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
+```
+
+## Run
+
+### Usage:
+`prompt_lookup_decoding_lm <MODEL_DIR> "<PROMPT>"`
+
+### Examples:
+
+#### Linux/MacOS:
+`./build/samples/cpp/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm ./TinyLlama-1.1B-Chat-v1.0/ "return 0;"`
+
+#### Windows:
+`.\build\samples\cpp\prompt_lookup_decoding_lm\Release\prompt_lookup_decoding_lm .\TinyLlama-1.1B-Chat-v1.0\ "return 0;"`
+
+To enable Unicode characters for Windows cmd open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
+
+Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model meta-llama/Llama-2-13b-chat-hf can benefit from being run on a dGPU. Modify the source code to change the device for inference to the GPU.
+
+See [../../../src/README.md#supported-models](../../src/README.md#supported-models) for the list of supported models.
diff --git a/...usal_lm/cpp/prompt_lookup_decoding_lm.cpp → ...decoding_lm/prompt_lookup_decoding_lm.cpp b/...usal_lm/cpp/prompt_lookup_decoding_lm.cpp → ...decoding_lm/prompt_lookup_decoding_lm.cpp
@@ -1,6 +1,7 @@
 // Copyright (C) 2023-2024 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
+#include <string_view>
 #include <openvino/core/parallel.hpp>
 #include <openvino/openvino.hpp>
 

diff --git a/...generation/causal_lm/cpp/requirements.txt → samples/cpp/requirements.txt b/...generation/causal_lm/cpp/requirements.txt → samples/cpp/requirements.txt