diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml index 80089a4e8..5a96741b5 100644 --- a/.github/workflows/causal_lm_cpp.yml +++ b/.github/workflows/causal_lm_cpp.yml @@ -191,6 +191,8 @@ jobs: cpp-greedy_causal_lm-windows: runs-on: windows-latest + env: + PYTHONIOENCODING: "utf8" defaults: run: shell: cmd @@ -626,6 +628,8 @@ jobs: cpp-continuous-batching-windows: runs-on: windows-latest + env: + PYTHONIOENCODING: "utf8" defaults: run: shell: cmd diff --git a/.github/workflows/genai_package.yml b/.github/workflows/genai_package.yml index 06e589dfb..9e439eb11 100644 --- a/.github/workflows/genai_package.yml +++ b/.github/workflows/genai_package.yml @@ -80,6 +80,7 @@ jobs: runs-on: windows-latest env: CMAKE_BUILD_PARALLEL_LEVEL: null + PYTHONIOENCODING: "utf8" defaults: run: shell: cmd diff --git a/.github/workflows/genai_python_lib.yml b/.github/workflows/genai_python_lib.yml index 29ceda216..38552174b 100644 --- a/.github/workflows/genai_python_lib.yml +++ b/.github/workflows/genai_python_lib.yml @@ -62,6 +62,7 @@ jobs: runs-on: windows-latest env: CMAKE_BUILD_PARALLEL_LEVEL: null + PYTHONIOENCODING: "utf8" defaults: run: shell: cmd diff --git a/samples/cpp/beam_search_causal_lm/README.md b/samples/cpp/beam_search_causal_lm/README.md index a10428891..201a9b5ce 100644 --- a/samples/cpp/beam_search_causal_lm/README.md +++ b/samples/cpp/beam_search_causal_lm/README.md @@ -18,6 +18,7 @@ optimum-cli export openvino --trust-remote-code --model TinyLlama/TinyLlama-1.1B `beam_search_causal_lm TinyLlama-1.1B-Chat-v1.0 "Why is the Sun yellow?"` To enable Unicode characters for Windows cmd open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot. +Also, you can enable UTF-8 mode by setting environment variable `PYTHONIOENCODING="utf8"`. Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model meta-llama/Llama-2-13b-chat-hf can benefit from being run on a dGPU. Modify the source code to change the device for inference to the GPU.