openvinotoolkit · ilya-lavrenov · Oct 9, 2024 · Nov 29, 2023 · Nov 29, 2023 · Nov 29, 2023
diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
@@ -681,6 +681,43 @@ jobs:
           diff pred2.txt ref.txt
           echo "Chat sample python" passed
 
+  visual_language_sample:
+    strategy:
+      fail-fast: false
+      # Windows fails to compile Jinja2Cpp.
+      matrix: {runs-on: [ubuntu-20.04-16-cores, macos-13]}
+    runs-on: ${{ matrix.runs-on }}
+    steps:
+      - uses: actions/checkout@v4
+        with: {submodules: recursive}
+      - uses: actions/setup-python@v4
+        with: {python-version: 3.12}
+      - run: mkdir ./ov/
+      - if: ${{ 'ubuntu-20.04-16-cores' == matrix.runs-on }}
+        run: >
+          curl ${{ env.l_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz
+          && sudo ./ov/install_dependencies/install_openvino_dependencies.sh
+      - if: ${{ 'macos-13' == matrix.runs-on }}
+        run: >
+          curl ${{ env.m_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz
+          && brew install coreutils scons
+      - run: OpenVINO_DIR=./ov/runtime/cmake/ cmake -DCMAKE_BUILD_TYPE=Release -B ./build/ ./
+      - run: >
+          LD_LIBRARY_PATH=${{ github.workspace }}/ov/runtime/3rdparty/tbb/lib/:$LD_LIBRARY_PATH
+          cmake --build ./build/ --config Release --target visual_language_chat -j
+      - run: >
+          source ./ov/setupvars.sh
+          && python -m pip install --upgrade-strategy eager ./thirdparty/openvino_tokenizers/[transformers] -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+      - run: >
+          source ./ov/setupvars.sh
+          && python ./samples/cpp/visual_language_chat/export_MiniCPM-V-2_6.py ./miniCPM-V-2_6/
+      - run: wget https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11 --output-document cat.jpg
+      - run: >
+          source ./ov/setupvars.sh
+          && ./build/samples/cpp/visual_language_chat/visual_language_chat ./miniCPM-V-2_6/ cat.jpg
+          <<< $'What is on the image?\nWhat is special on the image?'
+        timeout-minutes: 110
+
   cpp-continuous-batching-ubuntu:
     runs-on: ubuntu-20.04-8-cores
     defaults:
@@ -823,6 +860,7 @@ jobs:
             cpp-beam_search_causal_lm-Qwen-7B-Chat, cpp-beam_search_causal_lm-Qwen1_5-7B-Chat, cpp-beam_search_causal_lm-Phi-2,
             cpp-beam_search_causal_lm-notus-7b-v1, cpp-speculative_decoding_lm-ubuntu, cpp-prompt_lookup_decoding_lm-ubuntu,
             cpp-Phi-1_5, cpp-greedy_causal_lm-redpajama-3b-chat, cpp-chat_sample-ubuntu, cpp-continuous-batching-ubuntu,
+            visual_language_sample,
             cpp-continuous-batching-windows, cpp-continuous-batching-macos]
     if: ${{ always() }}
     runs-on: ubuntu-latest

diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml
@@ -386,7 +386,7 @@ jobs:
         if: ${{ 'Release' == matrix.build-type }} # Python bindings can be built in Release only
         run: |
           source ${OV_INSTALL_DIR}/setupvars.sh
-          timeout 25s ${OV_INSTALL_DIR}/samples/python/multinomial_causal_lm/multinomial_causal_lm.py ./TinyLlama-1.1B-Chat-v1.0/ 0
+          timeout 26s ${OV_INSTALL_DIR}/samples/python/multinomial_causal_lm/multinomial_causal_lm.py ./TinyLlama-1.1B-Chat-v1.0/ 0
 
   Overall_Status:
     name: ci/gha_overall_status_macos

diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt
@@ -10,6 +10,7 @@ add_subdirectory(cpp/greedy_causal_lm)
 add_subdirectory(cpp/lora_greedy_causal_lm)
 add_subdirectory(cpp/multinomial_causal_lm)
 add_subdirectory(cpp/prompt_lookup_decoding_lm)
+add_subdirectory(cpp/visual_language_chat)
 add_subdirectory(cpp/speculative_decoding_lm)
 add_subdirectory(cpp/benchmark_genai)
 add_subdirectory(cpp/whisper_speech_recognition)
@@ -24,7 +25,8 @@ install(DIRECTORY
             cpp/greedy_causal_lm
             cpp/multinomial_causal_lm
             # Don't install prompt_lookup_decoding_lm and speculative_decoding_lm because they don't use openvino_genai library and arent verifyed yet.
-            # Don't install continuous_batching_accuracy and continuous_batching_benchmark because they depend on json.
+            # Don't install continuous_batching_accuracy and continuous_batching_benchmark because CB isn't ready.
+            cpp/visual_language_chat
             cpp/whisper_speech_recognition
             cpp/stable_diffusion
             cpp/lora_greedy_causal_lm

diff --git a/samples/cpp/visual_language_chat/CMakeLists.txt b/samples/cpp/visual_language_chat/CMakeLists.txt
@@ -0,0 +1,28 @@
+# Copyright (C) 2023-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+find_package(OpenVINOGenAI REQUIRED
+    PATHS
+        "${CMAKE_BINARY_DIR}"  # Reuse the package from the build.
+        ${OpenVINO_DIR}  # GenAI may be installed alogside OpenVINO.
+    NO_CMAKE_FIND_ROOT_PATH
+)
+
+file(DOWNLOAD
+    https://raw.githubusercontent.com/nothings/stb/f75e8d1cad7d90d72ef7a4661f1b994ef78b4e31/stb_image.h
+    ${CMAKE_BINARY_DIR}/stb_image.h
+    EXPECTED_HASH MD5=27932e6fb3a2f26aee2fc33f2cb4e696)
+
+add_executable(visual_language_chat visual_language_chat.cpp load_image.cpp)
+target_include_directories(visual_language_chat PRIVATE "${CMAKE_CURRENT_SOUCE_DIR}" "${CMAKE_BINARY_DIR}")
+target_link_libraries(visual_language_chat PRIVATE openvino::genai)
+
+set_target_properties(visual_language_chat PROPERTIES
+    COMPILE_PDB_NAME visual_language_chat
+    # Ensure out of box LC_RPATH on macOS with SIP
+    INSTALL_RPATH_USE_LINK_PATH ON)
+
+install(TARGETS visual_language_chat
+        RUNTIME DESTINATION samples_bin/
+        COMPONENT samples_bin
+        EXCLUDE_FROM_ALL)
diff --git a/samples/cpp/visual_language_chat/README.md b/samples/cpp/visual_language_chat/README.md
@@ -0,0 +1,37 @@
+# C++ visual language chat
+
+This example showcases inference of Visual language models (VLMs): [`openbmb/MiniCPM-V-2_6`](https://huggingface.co/openbmb/MiniCPM-V-2_6). The application doesn't have many configuration options to encourage the reader to explore and modify the source code. For example, change the device for inference to GPU. The sample features `ov::genai::VLMPipeline` and runs the simplest deterministic greedy sampling algorithm. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/minicpm-v-multimodal-chatbot) which provides an example of Visual-language assistant.
+
+## Download and convert the model and tokenizers
+
+The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
+
+It's not required to install [../../requirements.txt](../../requirements.txt) for deployment if the model has already been exported.
+
+```sh
+pip install --upgrade-strategy eager -r ../../requirements.txt
+export_MiniCPM-V-2_6.py miniCPM-V-2_6
+```
+
+## Run
+
+[This image](https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11) can be used as a sample image.
+
+`visual_language_chat miniCPM-V-2_6 319483352-d5fbbd1a-d484-415c-88cb-9986625b7b11.jpg`
+
+Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model `llava-hf/llava-v1.6-mistral-7b-hf` can benefit from being run on a dGPU. Modify the source code to change the device for inference to the `GPU`.
+
+See [SUPPORTED_MODELS.md](../../../src/docs/SUPPORTED_MODELS.md#visual-language-models) for the list of supported models.
+
+### Troubleshooting
+
+#### Unicode characters encoding error on Windows
+
+Example error:
+```
+UnicodeEncodeError: 'charmap' codec can't encode character '\u25aa' in position 0: character maps to <undefined>
+```
+
+If you encounter the error described in the example when sample is printing output to the Windows console, it is likely due to the default Windows encoding not supporting certain Unicode characters. To resolve this:
+1. Enable Unicode characters for Windows cmd - open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
+2. Enable UTF-8 mode by setting environment variable `PYTHONIOENCODING="utf8"`.