From 56eeafcd1117b7a57ff396f96a6656e97d2bb6bd Mon Sep 17 00:00:00 2001
From: Yaroslav Tarkan <yaroslav.tarkan@intel.com>
Date: Wed, 24 Jul 2024 10:50:02 +0300
Subject: [PATCH] [2024.3] Fix symbol encode error (#629)

Symbols that cause errors:
- `\u0643`
- `\u25aa`
---
 .github/workflows/causal_lm_cpp.yml             |  4 ++++
 .github/workflows/genai_package.yml             |  1 +
 .github/workflows/genai_python_lib.yml          |  1 +
 samples/cpp/beam_search_causal_lm/README.md     | 14 +++++++++++++-
 samples/cpp/chat_sample/README.md               | 14 +++++++++++++-
 samples/cpp/greedy_causal_lm/README.md          | 14 +++++++++++++-
 samples/cpp/multinomial_causal_lm/README.md     | 14 +++++++++++++-
 samples/cpp/prompt_lookup_decoding_lm/README.md | 14 +++++++++++++-
 samples/cpp/speculative_decoding_lm/README.md   | 14 +++++++++++++-
 samples/python/beam_search_causal_lm/README.md  | 14 +++++++++++++-
 samples/python/chat_sample/README.md            | 14 +++++++++++++-
 samples/python/greedy_causal_lm/README.md       | 14 +++++++++++++-
 samples/python/multinomial_causal_lm/README.md  | 14 +++++++++++++-
 13 files changed, 136 insertions(+), 10 deletions(-)
diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
index 85bef624c8..527259f203 100644
--- a/.github/workflows/causal_lm_cpp.yml
+++ b/.github/workflows/causal_lm_cpp.yml
@@ -191,6 +191,8 @@ jobs:
 
   cpp-greedy_causal_lm-windows:
     runs-on: windows-latest
+    env:
+      PYTHONIOENCODING: "utf8"
     defaults:
       run:
         shell: cmd
@@ -626,6 +628,8 @@ jobs:
 
   cpp-continuous-batching-windows:
     runs-on: windows-latest
+    env:
+      PYTHONIOENCODING: "utf8"
     defaults:
       run:
         shell: cmd
diff --git a/.github/workflows/genai_package.yml b/.github/workflows/genai_package.yml
index 2535e423d9..cf604b4bcc 100644
--- a/.github/workflows/genai_package.yml
+++ b/.github/workflows/genai_package.yml
@@ -80,6 +80,7 @@ jobs:
     runs-on: windows-latest
     env:
       CMAKE_BUILD_PARALLEL_LEVEL: null
+      PYTHONIOENCODING: "utf8"
     defaults:
       run:
         shell: cmd
diff --git a/.github/workflows/genai_python_lib.yml b/.github/workflows/genai_python_lib.yml
index e0c43bddd5..257a9c2f57 100644
--- a/.github/workflows/genai_python_lib.yml
+++ b/.github/workflows/genai_python_lib.yml
@@ -63,6 +63,7 @@ jobs:
     runs-on: windows-latest
     env:
       CMAKE_BUILD_PARALLEL_LEVEL: null
+      PYTHONIOENCODING: "utf8"
     defaults:
       run:
         shell: cmd
diff --git a/samples/cpp/beam_search_causal_lm/README.md b/samples/cpp/beam_search_causal_lm/README.md
index 82232c42f6..0d2ee83bfc 100644
--- a/samples/cpp/beam_search_causal_lm/README.md
+++ b/samples/cpp/beam_search_causal_lm/README.md
@@ -17,8 +17,20 @@ optimum-cli export openvino --trust-remote-code --model TinyLlama/TinyLlama-1.1B
 
 `beam_search_causal_lm TinyLlama-1.1B-Chat-v1.0 "Why is the Sun yellow?"`
 
-To enable Unicode characters for Windows cmd open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
 
 Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model meta-llama/Llama-2-13b-chat-hf can benefit from being run on a dGPU. Modify the source code to change the device for inference to the GPU.
 
 See https://github.com/openvinotoolkit/openvino.genai/blob/master/src/README.md#supported-models for the list of supported models.
+
+### Troubleshooting
+
+#### Unicode characters encoding error on Windows
+
+Example error:
+```
+UnicodeEncodeError: 'charmap' codec can't encode character '\u25aa' in position 0: character maps to <undefined>
+```
+
+If you encounter the error described in the example when sample is printing output to the Windows console, it is likely due to the default Windows encoding not supporting certain Unicode characters. To resolve this:
+1. Enable Unicode characters for Windows cmd - open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
+2. Enable UTF-8 mode by setting environment variable `PYTHONIOENCODING="utf8"`.
diff --git a/samples/cpp/chat_sample/README.md b/samples/cpp/chat_sample/README.md
index 8a24b20005..a2eccb4d3d 100644
--- a/samples/cpp/chat_sample/README.md
+++ b/samples/cpp/chat_sample/README.md
@@ -17,8 +17,20 @@ optimum-cli export openvino --trust-remote-code --model TinyLlama/TinyLlama-1.1B
 
 `chat_sample TinyLlama-1.1B-Chat-v1.0`
 
-To enable Unicode characters for Windows cmd open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
 
 Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model meta-llama/Llama-2-13b-chat-hf can benefit from being run on a dGPU. Modify the source code to change the device for inference to the GPU.
 
 See https://github.com/openvinotoolkit/openvino.genai/blob/master/src/README.md#supported-models for the list of supported models.
+
+### Troubleshooting
+
+#### Unicode characters encoding error on Windows
+
+Example error:
+```
+UnicodeEncodeError: 'charmap' codec can't encode character '\u25aa' in position 0: character maps to <undefined>
+```
+
+If you encounter the error described in the example when sample is printing output to the Windows console, it is likely due to the default Windows encoding not supporting certain Unicode characters. To resolve this:
+1. Enable Unicode characters for Windows cmd - open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
+2. Enable UTF-8 mode by setting environment variable `PYTHONIOENCODING="utf8"`.
diff --git a/samples/cpp/greedy_causal_lm/README.md b/samples/cpp/greedy_causal_lm/README.md
index c0a7d5f3c4..79852e0d10 100644
--- a/samples/cpp/greedy_causal_lm/README.md
+++ b/samples/cpp/greedy_causal_lm/README.md
@@ -17,8 +17,20 @@ optimum-cli export openvino --trust-remote-code --model TinyLlama/TinyLlama-1.1B
 
 `greedy_causal_lm TinyLlama-1.1B-Chat-v1.0 "Why is the Sun yellow?"`
 
-To enable Unicode characters for Windows cmd open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
 
 Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model meta-llama/Llama-2-13b-chat-hf can benefit from being run on a dGPU. Modify the source code to change the device for inference to the GPU.
 
 See https://github.com/openvinotoolkit/openvino.genai/blob/master/src/README.md#supported-models for the list of supported models.
+
+### Troubleshooting
+
+#### Unicode characters encoding error on Windows
+
+Example error:
+```
+UnicodeEncodeError: 'charmap' codec can't encode character '\u25aa' in position 0: character maps to <undefined>
+```
+
+If you encounter the error described in the example when sample is printing output to the Windows console, it is likely due to the default Windows encoding not supporting certain Unicode characters. To resolve this:
+1. Enable Unicode characters for Windows cmd - open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
+2. Enable UTF-8 mode by setting environment variable `PYTHONIOENCODING="utf8"`.
diff --git a/samples/cpp/multinomial_causal_lm/README.md b/samples/cpp/multinomial_causal_lm/README.md
index 4478579919..21c9a07e77 100644
--- a/samples/cpp/multinomial_causal_lm/README.md
+++ b/samples/cpp/multinomial_causal_lm/README.md
@@ -17,8 +17,20 @@ optimum-cli export openvino --trust-remote-code --model TinyLlama/TinyLlama-1.1B
 
 `multinomial_causal_lm TinyLlama-1.1B-Chat-v1.0 "Why is the Sun yellow?"`
 
-To enable Unicode characters for Windows cmd open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
 
 Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model meta-llama/Llama-2-13b-chat-hf can benefit from being run on a dGPU. Modify the source code to change the device for inference to the GPU.
 
 See https://github.com/openvinotoolkit/openvino.genai/blob/master/src/README.md#supported-models for the list of supported models.
+
+### Troubleshooting
+
+#### Unicode characters encoding error on Windows
+
+Example error:
+```
+UnicodeEncodeError: 'charmap' codec can't encode character '\u25aa' in position 0: character maps to <undefined>
+```
+
+If you encounter the error described in the example when sample is printing output to the Windows console, it is likely due to the default Windows encoding not supporting certain Unicode characters. To resolve this:
+1. Enable Unicode characters for Windows cmd - open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
+2. Enable UTF-8 mode by setting environment variable `PYTHONIOENCODING="utf8"`.
diff --git a/samples/cpp/prompt_lookup_decoding_lm/README.md b/samples/cpp/prompt_lookup_decoding_lm/README.md
index 89a5e2c585..c5517c5bf6 100644
--- a/samples/cpp/prompt_lookup_decoding_lm/README.md
+++ b/samples/cpp/prompt_lookup_decoding_lm/README.md
@@ -20,8 +20,20 @@ optimum-cli export openvino --trust-remote-code --model TinyLlama/TinyLlama-1.1B
 
 `prompt_lookup_decoding_lm ./TinyLlama-1.1B-Chat-v1.0/ "return 0;"`
 
-To enable Unicode characters for Windows cmd open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
 
 Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model meta-llama/Llama-2-13b-chat-hf can benefit from being run on a dGPU. Modify the source code to change the device for inference to the GPU.
 
 See https://github.com/openvinotoolkit/openvino.genai/blob/master/src/README.md#supported-models for the list of supported models.
+
+### Troubleshooting
+
+#### Unicode characters encoding error on Windows
+
+Example error:
+```
+UnicodeEncodeError: 'charmap' codec can't encode character '\u25aa' in position 0: character maps to <undefined>
+```
+
+If you encounter the error described in the example when sample is printing output to the Windows console, it is likely due to the default Windows encoding not supporting certain Unicode characters. To resolve this:
+1. Enable Unicode characters for Windows cmd - open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
+2. Enable UTF-8 mode by setting environment variable `PYTHONIOENCODING="utf8"`.
diff --git a/samples/cpp/speculative_decoding_lm/README.md b/samples/cpp/speculative_decoding_lm/README.md
index c86bd8b617..644ebd2c94 100644
--- a/samples/cpp/speculative_decoding_lm/README.md
+++ b/samples/cpp/speculative_decoding_lm/README.md
@@ -24,8 +24,20 @@ optimum-cli export openvino --trust-remote-code --model meta-llama/Llama-2-7b-ch
 
 `speculative_decoding_lm TinyLlama-1.1B-Chat-v1.0 Llama-2-7b-chat-hf "Why is the Sun yellow?"`
 
-To enable Unicode characters for Windows cmd open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
 
 Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model meta-llama/Llama-2-13b-chat-hf can benefit from being run on a dGPU. Modify the source code to change the device for inference to the GPU.
 
 See https://github.com/openvinotoolkit/openvino.genai/blob/master/src/README.md#supported-models for the list of supported models.
+
+### Troubleshooting
+
+#### Unicode characters encoding error on Windows
+
+Example error:
+```
+UnicodeEncodeError: 'charmap' codec can't encode character '\u25aa' in position 0: character maps to <undefined>
+```
+
+If you encounter the error described in the example when sample is printing output to the Windows console, it is likely due to the default Windows encoding not supporting certain Unicode characters. To resolve this:
+1. Enable Unicode characters for Windows cmd - open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
+2. Enable UTF-8 mode by setting environment variable `PYTHONIOENCODING="utf8"`.
diff --git a/samples/python/beam_search_causal_lm/README.md b/samples/python/beam_search_causal_lm/README.md
index 5e80aa69da..7e412db379 100644
--- a/samples/python/beam_search_causal_lm/README.md
+++ b/samples/python/beam_search_causal_lm/README.md
@@ -17,8 +17,20 @@ optimum-cli export openvino --trust-remote-code --model TinyLlama/TinyLlama-1.1B
 
 `beam_search_causal_lm.py TinyLlama-1.1B-Chat-v1.0 "Why is the Sun yellow?"`
 
-To enable Unicode characters for Windows cmd open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
 
 Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model meta-llama/Llama-2-13b-chat-hf can benefit from being run on a dGPU. Modify the source code to change the device for inference to the GPU.
 
 See https://github.com/openvinotoolkit/openvino.genai/blob/master/src/README.md#supported-models for the list of supported models.
+
+### Troubleshooting
+
+#### Unicode characters encoding error on Windows
+
+Example error:
+```
+UnicodeEncodeError: 'charmap' codec can't encode character '\u25aa' in position 0: character maps to <undefined>
+```
+
+If you encounter the error described in the example when sample is printing output to the Windows console, it is likely due to the default Windows encoding not supporting certain Unicode characters. To resolve this:
+1. Enable Unicode characters for Windows cmd - open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
+2. Enable UTF-8 mode by setting environment variable `PYTHONIOENCODING="utf8"`.
diff --git a/samples/python/chat_sample/README.md b/samples/python/chat_sample/README.md
index 983789d0eb..aee8783b5f 100644
--- a/samples/python/chat_sample/README.md
+++ b/samples/python/chat_sample/README.md
@@ -17,8 +17,20 @@ optimum-cli export openvino --trust-remote-code --model TinyLlama/TinyLlama-1.1B
 
 `chat_sample.py TinyLlama-1.1B-Chat-v1.0`
 
-To enable Unicode characters for Windows cmd open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
 
 Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model meta-llama/Llama-2-13b-chat-hf can benefit from being run on a dGPU. Modify the source code to change the device for inference to the GPU.
 
 See https://github.com/openvinotoolkit/openvino.genai/blob/master/src/README.md#supported-models for the list of supported models.
+
+### Troubleshooting
+
+#### Unicode characters encoding error on Windows
+
+Example error:
+```
+UnicodeEncodeError: 'charmap' codec can't encode character '\u25aa' in position 0: character maps to <undefined>
+```
+
+If you encounter the error described in the example when sample is printing output to the Windows console, it is likely due to the default Windows encoding not supporting certain Unicode characters. To resolve this:
+1. Enable Unicode characters for Windows cmd - open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
+2. Enable UTF-8 mode by setting environment variable `PYTHONIOENCODING="utf8"`.
diff --git a/samples/python/greedy_causal_lm/README.md b/samples/python/greedy_causal_lm/README.md
index 97b044eb51..1f0eb333ea 100644
--- a/samples/python/greedy_causal_lm/README.md
+++ b/samples/python/greedy_causal_lm/README.md
@@ -17,8 +17,20 @@ optimum-cli export openvino --trust-remote-code --model TinyLlama/TinyLlama-1.1B
 
 `greedy_causal_lm.py TinyLlama-1.1B-Chat-v1.0 "Why is the Sun yellow?"`
 
-To enable Unicode characters for Windows cmd open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
 
 Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model meta-llama/Llama-2-13b-chat-hf can benefit from being run on a dGPU. Modify the source code to change the device for inference to the GPU.
 
 See https://github.com/openvinotoolkit/openvino.genai/blob/master/src/README.md#supported-models for the list of supported models.
+
+### Troubleshooting
+
+#### Unicode characters encoding error on Windows
+
+Example error:
+```
+UnicodeEncodeError: 'charmap' codec can't encode character '\u25aa' in position 0: character maps to <undefined>
+```
+
+If you encounter the error described in the example when sample is printing output to the Windows console, it is likely due to the default Windows encoding not supporting certain Unicode characters. To resolve this:
+1. Enable Unicode characters for Windows cmd - open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
+2. Enable UTF-8 mode by setting environment variable `PYTHONIOENCODING="utf8"`.
diff --git a/samples/python/multinomial_causal_lm/README.md b/samples/python/multinomial_causal_lm/README.md
index d39142f3de..0778868e6a 100644
--- a/samples/python/multinomial_causal_lm/README.md
+++ b/samples/python/multinomial_causal_lm/README.md
@@ -17,8 +17,20 @@ optimum-cli export openvino --trust-remote-code --model TinyLlama/TinyLlama-1.1B
 
 `multinomial_causal_lm.py TinyLlama-1.1B-Chat-v1.0 "Why is the Sun yellow?"`
 
-To enable Unicode characters for Windows cmd open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
 
 Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model meta-llama/Llama-2-13b-chat-hf can benefit from being run on a dGPU. Modify the source code to change the device for inference to the GPU.
 
 See https://github.com/openvinotoolkit/openvino.genai/blob/master/src/README.md#supported-models for the list of supported models.
+
+### Troubleshooting
+
+#### Unicode characters encoding error on Windows
+
+Example error:
+```
+UnicodeEncodeError: 'charmap' codec can't encode character '\u25aa' in position 0: character maps to <undefined>
+```
+
+If you encounter the error described in the example when sample is printing output to the Windows console, it is likely due to the default Windows encoding not supporting certain Unicode characters. To resolve this:
+1. Enable Unicode characters for Windows cmd - open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
+2. Enable UTF-8 mode by setting environment variable `PYTHONIOENCODING="utf8"`.