fix comments

Wovchena · Oct 10, 2024 · 31cdcbf · 31cdcbf
1 parent 40b4924
commit 31cdcbf
Show file tree

Hide file tree

Showing 6 changed files with 151 additions and 275 deletions.
diff --git a/samples/cpp/visual_language_chat/CMakeLists.txt b/samples/cpp/visual_language_chat/CMakeLists.txt
@@ -8,14 +8,19 @@ find_package(OpenVINOGenAI REQUIRED
     NO_CMAKE_FIND_ROOT_PATH
 )
 
+FetchContent_Declare(cxxopts
+    URL https://github.com/jarro2783/cxxopts/archive/refs/tags/v3.1.1.tar.gz
+    URL_HASH SHA256=523175f792eb0ff04f9e653c90746c12655f10cb70f1d5e6d6d9491420298a08)
+FetchContent_MakeAvailable(cxxopts)
+
 file(DOWNLOAD
     https://raw.githubusercontent.com/nothings/stb/f75e8d1cad7d90d72ef7a4661f1b994ef78b4e31/stb_image.h
     ${CMAKE_BINARY_DIR}/stb_image.h
     EXPECTED_HASH MD5=27932e6fb3a2f26aee2fc33f2cb4e696)
 
 add_executable(visual_language_chat visual_language_chat.cpp load_image.cpp)
 target_include_directories(visual_language_chat PRIVATE "${CMAKE_CURRENT_SOUCE_DIR}" "${CMAKE_BINARY_DIR}")
-target_link_libraries(visual_language_chat PRIVATE openvino::genai)
+target_link_libraries(visual_language_chat PRIVATE openvino::genai cxxopts::cxxopts)
 
 set_target_properties(visual_language_chat PROPERTIES
     COMPILE_PDB_NAME visual_language_chat

diff --git a/samples/cpp/visual_language_chat/visual_language_chat.cpp b/samples/cpp/visual_language_chat/visual_language_chat.cpp
@@ -5,46 +5,102 @@
 #include <openvino/genai/vlm_pipeline.hpp>
 #include <openvino/runtime/intel_gpu/properties.hpp>
 
+#include <iostream>
+#include <filesystem>
+namespace fs = std::filesystem;
+
+#include <cxxopts.hpp>
+
 bool print_subword(std::string&& subword) {
     return !(std::cout << subword << std::flush);
 }
 
 int main(int argc, char* argv[]) try {
-    if (3 != argc) {
-        throw std::runtime_error(std::string{"Usage "} + argv[0] + " <MODEL_DIR> <IMAGE_FILE>");
+
+    cxxopts::Options options("visual_language_chat", "Help command");
+
+    options.add_options()
+    ("m,model", "Path to model and tokenizers base directory", cxxopts::value<std::string>()->default_value(""))
+    ("i,inputs", "Path to image or to directory with images", cxxopts::value<std::string>()->default_value(""))
+    ("d,device", "Target device to run the model", cxxopts::value<std::string>()->default_value("CPU"))
+    ("s,sampling", "Sampling method: [greedy|multinomial|beam_search]. Optional, 'greedy' by default.", cxxopts::value<std::string>()->default_value("greedy"))
+    ("h,help", "Print usage");
+
+    cxxopts::ParseResult result;
+    try {
+        result = options.parse(argc, argv);
+    } catch (const cxxopts::exceptions::exception& e) {
+        std::cout << e.what() << "\n\n";
+        std::cout << options.help() << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    if (result.count("help")) {
+        std::cout << options.help() << std::endl;
+        return EXIT_SUCCESS;
     }
-    ov::Tensor image = utils::load_image(argv[2]);
-    std::string device = "CPU";  // GPU can be used as well
+
+    const std::string model_path = result["model"].as<std::string>();
+    const std::string device = result["device"].as<std::string>();
+    const std::string input_path = result["inputs"].as<std::string>();
+    const std::string sampling_method = result["sampling"].as<std::string>();
+
+    ov::AnyMap properies;
+    if (sampling_method == "greedy") {
+        properies.insert(ov::genai::generation_config(ov::genai::greedy()));
+        properies.insert(ov::genai::streamer(print_subword));
+    }  else if (sampling_method == "beam_search") {
+        properies.insert(ov::genai::generation_config(ov::genai::beam_search()));
+    } else if (sampling_method == "multinomial") {
+        properies.insert(ov::genai::generation_config(ov::genai::multinomial()));
+        properies.insert(ov::genai::streamer(print_subword));
+    } else {
+        throw std::runtime_error("Generation config should have values: [greedy|multinomial|beam_search] or could be empty, in which case the greedy approach will be used.");
+    }
+
+    std::vector<ov::Tensor> images;
+    if (!input_path.empty() && fs::exists(input_path)) {
+        if (fs::is_directory(input_path)) {
+            for (const auto& dir_entry : fs::directory_iterator(input_path)) {
+                ov::Tensor image = utils::load_image(dir_entry.path());
+                images.push_back(std::move(image));
+            }
+        } else if (fs::is_regular_file(input_path)) {
+            ov::Tensor image = utils::load_image(input_path);
+            images.push_back(std::move(image));
+        }
+    }
+
+    if (images.empty())
+        throw std::runtime_error("No one image found by path " + input_path);
+    else
+        properies.insert(images.size() == 1 ? ov::genai::image(images.at(0)) : ov::genai::images(images));
+
     ov::AnyMap enable_compile_cache;
     if ("GPU" == device) {
         // Cache compiled models on disk for GPU to save time on the
         // next run. It's not beneficial for CPU.
         enable_compile_cache.insert({ov::cache_dir("vlm_cache")});
     }
-    ov::genai::VLMPipeline pipe(argv[1], device, enable_compile_cache);
+    ov::genai::VLMPipeline pipe(model_path, device, enable_compile_cache);
     std::string prompt;
 
     pipe.start_chat();
     std::cout << "question:\n";
     if (!std::getline(std::cin, prompt)) {
         throw std::runtime_error("std::cin failed");
     }
-    pipe.generate(
-        prompt,
-        // ov::genai::image(std::move(image)),
-        ov::genai::generation_config(ov::genai::beam_search()),
-        // ov::genai::generation_config(ov::genai::greedy()),
-        // ov::genai::generation_config(ov::genai::multinomial()),
-        ov::genai::streamer(print_subword)
-    );
+    auto resuls = pipe.generate(prompt, properies);
+    if (sampling_method == "beam_search") {
+        std::cout << resuls.texts.at(0) << std::endl;
+    }
     std::cout << "\n----------\n"
         "question:\n";
     while (std::getline(std::cin, prompt)) {
-        pipe.generate(prompt,
-        	        ov::genai::generation_config(ov::genai::beam_search()),
-                    // ov::genai::generation_config(ov::genai::greedy()),
-                    // ov::genai::generation_config(ov::genai::multinomial()),
-                      ov::genai::streamer(print_subword));
+        resuls = pipe.generate(prompt, properies);
+        if (sampling_method == "beam_search") {
+            std::cout << resuls.texts.at(0) << std::endl;
+        }
         std::cout << "\n----------\n"
             "question:\n";
     }

diff --git a/src/cpp/src/sampler.cpp b/src/cpp/src/sampler.cpp
@@ -577,13 +577,13 @@ std::vector<int64_t> Sampler::_try_finish_generation(SequenceGroup::Ptr & sequen
 }
 
 
-std::vector<int32_t> Sampler::get_beam_idxs(uint64_t request_id) {
-    std::vector<int32_t> beams;
-    if (m_beam_search_info.find(request_id) != m_beam_search_info.end()) {
-        GroupBeamSearcher beam_searcher = m_beam_search_info.at(request_id);
-        std::vector<int32_t> beams = beam_searcher.get_beam_idxs();
+std::vector<int32_t> Sampler::get_beam_idxs(SequenceGroup::CPtr request) {
+    uint64_t request_id = request->get_request_id();
+    auto beam_searcher = m_beam_search_info.find(request_id);
+    if (m_beam_search_info.find(request_id) == m_beam_search_info.end()) {
+        return std::vector(request->num_running_seqs(), 0);
     }
-    return beams;
+    return beam_searcher->second.get_beam_idxs();
 }
 
 

diff --git a/src/cpp/src/sampler.hpp b/src/cpp/src/sampler.hpp
@@ -61,7 +61,7 @@ class Sampler {
     SamplerOutput sample(std::vector<SequenceGroup::Ptr> & sequence_groups, ov::Tensor logits);
     void set_seed(size_t seed) { rng_engine.seed(seed); }
     void clear_beam_search_info(uint64_t request_id);
-    std::vector<int32_t> get_beam_idxs(uint64_t request_id);
+    std::vector<int32_t> get_beam_idxs(SequenceGroup::CPtr);
 };
 
 class Sampler::GroupBeamSearcher {