Wovchena · Wovchena · May 16, 2024 · May 22, 2024 · May 22, 2024 · May 22, 2024
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -1,5 +1,9 @@
 version: 2
 updates:
+  - package-ecosystem: "pip"
+    directory: "./"
+    schedule:
+      interval: "weekly"
   - package-ecosystem: "pip"
     directory: "image_generation/stable_diffusion_1_5/cpp/scripts/"
     schedule:

diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
@@ -194,8 +194,8 @@ jobs:
         shell: cmd
         run: |
           call w_openvino_toolkit_windows_2024.1.0.15008.f4afc983258_x86_64\setupvars.bat
-
-          .\build\Release\beam_search_causal_lm.exe .\TinyLlama-1.1B-Chat-v1.0\ "69" > .\pred.txt
+          .\build\text_generation\causal_lm\cpp\Release\beam_search_causal_lm.exe .\TinyLlama-1.1B-Chat-v1.0\ "69" > .\pred.txt
+
           echo import transformers > ref.py
           echo predictions = open('pred.txt', 'r').read() >> ref.py
           echo tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0') >> ref.py

diff --git a/.github/workflows/genai_package.yml b/.github/workflows/genai_package.yml
@@ -2,7 +2,9 @@ name: genai_package
 on: pull_request
 jobs:
   ubuntu_genai_package:
-    if: false
+    strategy:
+      matrix:
+        build-type: [Release, Debug]
     runs-on: ubuntu-20.04
     steps:
       - uses: actions/checkout@v4
@@ -12,18 +14,27 @@ jobs:
         with:
           python-version: 3.8
       - run: mkdir ./ov/
-      - run: curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.1/linux/l_openvino_toolkit_ubuntu20_2024.1.0.15008.f4afc983258_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz
+      - run: curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.2.0-15454-0d95325972f/l_openvino_toolkit_centos7_2024.2.0.dev20240522_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz
       - run: sudo ./ov/install_dependencies/install_openvino_dependencies.sh
-      - run: source ./ov/setupvars.sh && cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
-      - run: source ./ov/setupvars.sh && cmake --build ./build/ --config Release --target package -j
-      - run: source ./ov/setupvars.sh && cmake --install ./build/ --config Release --prefix ov
-      - run: ov/samples/cpp/build_samples.sh -b "${{ github.workspace }}/s pace"
+      - run: sudo apt-get install libtbb-dev
+      - run: source ./ov/setupvars.sh && cmake -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} -S ./ -B ./build/
+      - run: source ./ov/setupvars.sh && cmake --build ./build/ --config ${{ matrix.build-type }} --target package -j
+      - run: source ./ov/setupvars.sh && cmake --install ./build/ --config ${{ matrix.build-type }} --prefix ov
+      - run: ov/samples/cpp/build_samples.sh -i ${{ github.workspace }}/s\ pace
+        if: ${{ 'Release' == matrix.build-type }}  # build_samples enforces Release build
       - run: source ./ov/setupvars.sh && python -m pip install --upgrade-strategy eager -r text_generation/causal_lm/cpp/requirements.txt
+        if: ${{ 'Release' == matrix.build-type }}
       - run: source ./ov/setupvars.sh && python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
+        if: ${{ 'Release' == matrix.build-type }}
       - run: source ./ov/setupvars.sh && optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
-      - run: source ./ov/setupvars.sh && timeout 50s "${{ github.workspace }}/s pace/intel64/Release/greedy_causal_lm" .\TinyLlama-1.1B-Chat-v1.0\ ""
+        if: ${{ 'Release' == matrix.build-type }}
+      - run: source ./ov/setupvars.sh && timeout 50s ${{ github.workspace }}/s\ pace/samples_bin/greedy_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ ""
+        if: ${{ 'Release' == matrix.build-type }}
 
   windows_genai_package:
+    strategy:
+      matrix:
+        build-type: [Release, Debug]
     runs-on: windows-latest
     defaults:
       run:
@@ -37,11 +48,16 @@ jobs:
           python-version: 3.8
       - run: curl --output ov.zip https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.2.0-15349-765302e0de1/w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64.zip
       - run: unzip ov.zip
-      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
-      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && cmake --build ./build/ --config Release --target package -j
-      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && cmake --install ./build/ --config Release --prefix w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64
-      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\samples\cpp\build_samples_msvc.bat -b "${{ github.workspace }}/samples_build"
+      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && cmake -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} -S ./ -B ./build/
+      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && cmake --build ./build/ --config ${{ matrix.build-type }} --target package -j
+      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && cmake --install ./build/ --config ${{ matrix.build-type }} --prefix w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64
+      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\samples\cpp\build_samples_msvc.bat -i "${{ github.workspace }}/samples_install"
+        if: ${{ 'Release' == matrix.build-type }}  # build_samples enforces Release build
       - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && python -m pip install --upgrade-strategy eager -r text_generation/causal_lm/cpp/requirements.txt
+        if: ${{ 'Release' == matrix.build-type }}
       - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
+        if: ${{ 'Release' == matrix.build-type }}
       - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
-      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && "${{ github.workspace }}/samples_build/intel64/Release/greedy_causal_lm" .\TinyLlama-1.1B-Chat-v1.0\ ""
+        if: ${{ 'Release' == matrix.build-type }}
+      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && "${{ github.workspace }}/samples_install/samples_bin/greedy_causal_lm" .\TinyLlama-1.1B-Chat-v1.0\ ""
+        if: ${{ 'Release' == matrix.build-type }}
diff --git a/.github/workflows/genai_python_lib.yml b/.github/workflows/genai_python_lib.yml
@@ -16,9 +16,22 @@ jobs:
       - run: source ./ov/setupvars.sh && cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
       - run: source ./ov/setupvars.sh && cmake --build ./build/ --config Release -j
       - run: python -m pip install --pre openvino --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly  # Can't load CentOS libraries from the archive
-      - run: PYTHONPATH=./src/python/ python -c "from openvino_genai.py_generate_pipeline import LLMPipeline"
-      - run: source ./ov/setupvars.sh && python -m pip install --pre . --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
-      - run: python -c "from openvino_genai.py_generate_pipeline import LLMPipeline"
+      # GitHub Actions already provides what is listed in ./requirements-build.txt but the internal
+      # build system doesn't. Install ./requirements-build.txt to detect possible conflicts.
+      - run: source ./ov/setupvars.sh && python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./requirements-build.txt
+      - run: PYTHONPATH=./src/python/ python -c "from openvino_genai import LLMPipeline"
+      - run: source ./ov/setupvars.sh && CMAKE_BUILD_PARALLEL_LEVEL="" python -m pip install --pre . --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+      - run: python -c "from openvino_genai import LLMPipeline"
+      - name: GenAI Python API tests
+        run: |
+          source ./ov/setupvars.sh
+          cd ./tests/python_tests/
+          python -m pip install -r requirements.txt
+          models=$(python list_test_models.py)
+          echo "$models" | while read -r model_name model_path; do
+              optimum-cli export openvino --trust-remote-code --weight-format fp16 --model "$model_name" "$model_path"
+          done
+          python -m pytest test_generate_api.py
 
   windows_genai_python_lib:
     runs-on: windows-latest
@@ -37,6 +50,9 @@ jobs:
       - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
       - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && cmake --build ./build/ --config Release -j
       - run: python -m pip install "numpy<1.27"
-      - run: set "PYTHONPATH=./src/python;" && call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && python -c "from openvino_genai.py_generate_pipeline import LLMPipeline"  # cmd evaluates variables in a different way. Setting PYTHONPATH before setupvars.bat instead of doing that after solves that.
-      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && python -m pip install .
-      - run: python -c "from openvino_genai.py_generate_pipeline import LLMPipeline"
+      # GitHub Actions already provides what is listed in ./requirements-build.txt but the internal
+      # build system doesn't. Install ./requirements-build.txt to detect possible conflicts.
+      - run: call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./requirements-build.txt
+      - run: set "PYTHONPATH=./src/python;" && call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && python -c "from openvino_genai import LLMPipeline"  # cmd evaluates variables in a different way. Setting PYTHONPATH before setupvars.bat instead of doing that after solves that.
+      - run: set CMAKE_BUILD_PARALLEL_LEVEL="" && call w_openvino_toolkit_windows_2024.2.0.dev20240515_x86_64\setupvars.bat && python -m pip install .
+      - run: python -c "from openvino_genai import LLMPipeline"
diff --git a/.gitmodules b/.gitmodules
@@ -1,9 +1,3 @@
 [submodule "thirdparty/openvino_tokenizers"]
     path = thirdparty/openvino_tokenizers
     url = https://github.com/openvinotoolkit/openvino_tokenizers.git
-[submodule "thirdparty/nlohmann_json"]
-	path = thirdparty/nlohmann_json
-	url = https://github.com/nlohmann/json.git
-[submodule "thirdparty/Jinja2Cpp"]
-	path = thirdparty/Jinja2Cpp
-	url = https://github.com/jinja2cpp/Jinja2Cpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -4,16 +4,23 @@
 
 cmake_minimum_required(VERSION 3.15)
 
-set(CMAKE_BUILD_TYPE "Release" CACHE STRING "CMake build type")
-set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Release" "Debug" "RelWithDebInfo" "MinSizeRel")
+# Multi config generators such as Visual Studio ignore CMAKE_BUILD_TYPE. Multi config generators are configured with
+# CMAKE_CONFIGURATION_TYPES, but limiting options in it completely removes such build options
+get_property(GENERATOR_IS_MULTI_CONFIG_VAR GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
+if(NOT GENERATOR_IS_MULTI_CONFIG_VAR AND NOT DEFINED CMAKE_BUILD_TYPE)
+    message(STATUS "CMAKE_BUILD_TYPE is not defined, 'Release' will be used")
+    # Setting CMAKE_BUILD_TYPE as CACHE must go before project(). Otherwise project() sets its value and set() doesn't take an effect
+    set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel ...")
+endif()
 
-project(openvino_genai VERSION 2024.2.0.0)
+project(OpenVINOGenAI VERSION 2024.2.0.0)
 
 add_subdirectory(./thirdparty/openvino_tokenizers/ "${CMAKE_CURRENT_BINARY_DIR}/openvino_tokenizers/")
 add_subdirectory(src)
 add_subdirectory(text_generation/causal_lm/cpp)
 
 install(DIRECTORY text_generation/causal_lm/cpp/ DESTINATION samples/cpp/causal_lm COMPONENT cpp_samples_genai)
-install(FILES LICENSE third-party-programs.txt DESTINATION licensing_genai COMPONENT licensing_genai)  # TODO: how to merge with OPenvino
+install(FILES LICENSE DESTINATION licensing COMPONENT licensing_genai RENAME LICENSE-GENAI)
+install(FILES third-party-programs.txt DESTINATION licensing COMPONENT licensing_genai RENAME third-party-programs-genai.txt)
 set(CPACK_GENERATOR "ZIP")
 include(CPack)
diff --git a/pyproject.toml b/pyproject.toml
@@ -23,7 +23,7 @@ dependencies = [
 cmake.source-dir = "./"
 cmake.build-type = "Release"
 cmake.targets = ["py_generate_pipeline", "genai"]
-install.components = ["core_genai", "pygenai"]
+install.components = ["wheel_genai"]
 sdist.cmake = true
 wheel.packages = ["src/python/openvino_genai"]
 wheel.install-dir = "openvino_genai"

diff --git a/requirements-build.txt b/requirements-build.txt
@@ -0,0 +1,2 @@
+cmake~=3.23
+build~=1.2.1
diff --git a/...causal_lm/cpp/generate_pipeline/README.md → src/README.md b/...causal_lm/cpp/generate_pipeline/README.md → src/README.md
@@ -2,27 +2,41 @@
 
 ## Usage 
 
-Firs of all you need to convert your model with optimum-cli
+First of all you need to convert your model with optimum-cli
 ``` sh
 optimum-cli export openvino --model "TinyLlama/TinyLlama-1.1B-Chat-v1.0" --weight-format fp16 --trust-remote-code "TinyLlama-1.1B-Chat-v1.0"
 pip install openvino-genai
 ```
 
 LLMPipeline is the main object used for decoding. You can initiliza it straigh away from the folder with the converted model. It will automanically load the main model, tokenizer, detokenizer and default generation configuration.
 
-### In Python
+### Python
 
 A minimalist example:
 ```python
-import py_generate_pipeline as genai # set more friendly module name
-pipe = genai.LLMPipeline(model_path, "CPU")
+import openvino_genai as ov_genai
+pipe = ov_genai.LLMPipeline(model_path, "CPU")
 print(pipe.generate("The Sun is yellow bacause"))
 ```
 
+Calling generate with custom generation config parameters, e.g. config for grouped beam search
+```python
+import openvino_genai as ov_genai
+pipe = ov_genai.LLMPipeline(model_path, "CPU")
+
+res = pipe.generate("The Sun is yellow bacause", max_new_tokens=30, num_groups=3, group_size=5)
+print(res)
+```
+
+output:
+```
+'it is made up of carbon atoms. The carbon atoms are arranged in a linear pattern, which gives the yellow color. The arrangement of carbon atoms in'
+```
+
 A simples chat in python:
 ```python
 import openvino_genai as ov_genai
-pipe = ov_genai.LLMPipeline(model_path)
+pipe = ov_ov_genai.LLMPipeline(model_path)
 
 config = {'num_groups': 3, 'group_size': 5, 'diversity_penalty': 1.1}
 pipe.set_generation_cofnig(config)
@@ -39,60 +53,45 @@ pipe.finish_chat()
 ```
 
 Test to compare with Huggingface outputs
-```python
-from transformers import AutoTokenizer, AutoModelForCausalLM
-
-tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
-model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
-
-max_new_tokens = 32
-prompt = 'table is made of'
-
-encoded_prompt = tokenizer.encode(prompt, return_tensors='pt', add_special_tokens=False)
-hf_encoded_output = model.generate(encoded_prompt, max_new_tokens=max_new_tokens, do_sample=False)
-hf_output = tokenizer.decode(hf_encoded_output[0, encoded_prompt.shape[1]:])
-print(f'hf_output: {hf_output}')
-
-import sys
-sys.path.append('build-Debug/')
-import py_generate_pipeline as genai # set more friendly module name
-
-pipe = genai.LLMPipeline('text_generation/causal_lm/TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/')
-ov_output = pipe(prompt, max_new_tokens=max_new_tokens)
-print(f'ov_output: {ov_output}')
 
-assert hf_output == ov_output
-
-```
-
-### In C++
+### C++
 
 Minimalistc example
 ```cpp
+#include "openvino/genai/llm_pipeline.hpp"
+#include <iostream>
+
 int main(int argc, char* argv[]) {
     std::string model_path = argv[1];
     ov::LLMPipeline pipe(model_path, "CPU");
-    cout << pipe.generate("The Sun is yellow bacause");
+    std::cout << pipe.generate("The Sun is yellow bacause");
 }
 ```
 
 Using Group Beam Search Decoding
 ```cpp
+#include "openvino/genai/llm_pipeline.hpp"
+#include <iostream>
+
 int main(int argc, char* argv[]) {
     std::string model_path = argv[1];
     ov::LLMPipeline pipe(model_path, "CPU");
+
     ov::GenerationConfig config = pipe.get_generation_config();
     config.max_new_tokens = 256;
     config.num_groups = 3;
     config.group_size = 5;
     config.diversity_penalty = 1.0f;
 
-    cout << pipe.generate("The Sun is yellow bacause", config);
+    std::cout << pipe.generate("The Sun is yellow bacause", config);
 }
 ```
 
 A simplest chat in C++
 ``` cpp
+#include "openvino/genai/llm_pipeline.hpp"
+#include <iostream>
+
 int main(int argc, char* argv[]) {
     std::string prompt;
 
@@ -142,24 +141,38 @@ int main(int argc, char* argv[]) {
 Streaming exapmle with lambda function
 
 ``` cpp
-int main(int argc, char* argv[]) {
-    auto streamer = [](std::string word) { std::cout << word << std::flush; };
 
+#include "openvino/genai/llm_pipeline.hpp"
+#include <iostream>
+
+int main(int argc, char* argv[]) {
     std::string model_path = argv[1];
     ov::LLMPipeline pipe(model_path, "CPU");
-    cout << pipe.generate("The Sun is yellow bacause", streamer);
+
+    auto streamer = [](std::string word) { std::cout << word << std::flush; };
+    std::cout << pipe.generate("The Sun is yellow bacause", streamer);
 }
 ```
 
 Streaming with custom class
 ``` cpp
 #include <streamer_base.hpp>
+#include "openvino/genai/llm_pipeline.hpp"
+#include <iostream>
 
 class CustomStreamer: publict StreamerBase {
 public:
-    void put(int64_t token) {/* decode tokens and do process them*/};
-
-    void end() {/* decode tokens and do process them*/};
+    void put(int64_t token) {
+        /* custom decoding/tokens processing code
+        tokens_cache.push_back(token);
+        std::string text = m_tokenizer.decode(tokens_cache);
+        ...
+        */
+    };
+
+    void end() {
+        /* custom finalization */
+    };
 };
 
 int main(int argc, char* argv[]) {
@@ -170,4 +183,3 @@ int main(int argc, char* argv[]) {
     cout << pipe.generate("The Sun is yellow bacause", custom_streamer);
 }
 ```
-