Skip to content

Commit

Permalink
fix comments
Browse files Browse the repository at this point in the history
  • Loading branch information
sbalandi committed Oct 10, 2024
1 parent 40b4924 commit 31cdcbf
Show file tree
Hide file tree
Showing 6 changed files with 151 additions and 275 deletions.
7 changes: 6 additions & 1 deletion samples/cpp/visual_language_chat/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,19 @@ find_package(OpenVINOGenAI REQUIRED
NO_CMAKE_FIND_ROOT_PATH
)

FetchContent_Declare(cxxopts
URL https://github.com/jarro2783/cxxopts/archive/refs/tags/v3.1.1.tar.gz
URL_HASH SHA256=523175f792eb0ff04f9e653c90746c12655f10cb70f1d5e6d6d9491420298a08)
FetchContent_MakeAvailable(cxxopts)

file(DOWNLOAD
https://raw.githubusercontent.com/nothings/stb/f75e8d1cad7d90d72ef7a4661f1b994ef78b4e31/stb_image.h
${CMAKE_BINARY_DIR}/stb_image.h
EXPECTED_HASH MD5=27932e6fb3a2f26aee2fc33f2cb4e696)

add_executable(visual_language_chat visual_language_chat.cpp load_image.cpp)
target_include_directories(visual_language_chat PRIVATE "${CMAKE_CURRENT_SOUCE_DIR}" "${CMAKE_BINARY_DIR}")
target_link_libraries(visual_language_chat PRIVATE openvino::genai)
target_link_libraries(visual_language_chat PRIVATE openvino::genai cxxopts::cxxopts)

set_target_properties(visual_language_chat PROPERTIES
COMPILE_PDB_NAME visual_language_chat
Expand Down
92 changes: 74 additions & 18 deletions samples/cpp/visual_language_chat/visual_language_chat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,46 +5,102 @@
#include <openvino/genai/vlm_pipeline.hpp>
#include <openvino/runtime/intel_gpu/properties.hpp>

#include <iostream>
#include <filesystem>
namespace fs = std::filesystem;

#include <cxxopts.hpp>

bool print_subword(std::string&& subword) {
return !(std::cout << subword << std::flush);
}

int main(int argc, char* argv[]) try {
if (3 != argc) {
throw std::runtime_error(std::string{"Usage "} + argv[0] + " <MODEL_DIR> <IMAGE_FILE>");

cxxopts::Options options("visual_language_chat", "Help command");

options.add_options()
("m,model", "Path to model and tokenizers base directory", cxxopts::value<std::string>()->default_value(""))
("i,inputs", "Path to image or to directory with images", cxxopts::value<std::string>()->default_value(""))
("d,device", "Target device to run the model", cxxopts::value<std::string>()->default_value("CPU"))
("s,sampling", "Sampling method: [greedy|multinomial|beam_search]. Optional, 'greedy' by default.", cxxopts::value<std::string>()->default_value("greedy"))
("h,help", "Print usage");

cxxopts::ParseResult result;
try {
result = options.parse(argc, argv);
} catch (const cxxopts::exceptions::exception& e) {
std::cout << e.what() << "\n\n";
std::cout << options.help() << std::endl;
return EXIT_FAILURE;
}

if (result.count("help")) {
std::cout << options.help() << std::endl;
return EXIT_SUCCESS;
}
ov::Tensor image = utils::load_image(argv[2]);
std::string device = "CPU"; // GPU can be used as well

const std::string model_path = result["model"].as<std::string>();
const std::string device = result["device"].as<std::string>();
const std::string input_path = result["inputs"].as<std::string>();
const std::string sampling_method = result["sampling"].as<std::string>();

ov::AnyMap properies;
if (sampling_method == "greedy") {
properies.insert(ov::genai::generation_config(ov::genai::greedy()));
properies.insert(ov::genai::streamer(print_subword));
} else if (sampling_method == "beam_search") {
properies.insert(ov::genai::generation_config(ov::genai::beam_search()));
} else if (sampling_method == "multinomial") {
properies.insert(ov::genai::generation_config(ov::genai::multinomial()));
properies.insert(ov::genai::streamer(print_subword));
} else {
throw std::runtime_error("Generation config should have values: [greedy|multinomial|beam_search] or could be empty, in which case the greedy approach will be used.");
}

std::vector<ov::Tensor> images;
if (!input_path.empty() && fs::exists(input_path)) {
if (fs::is_directory(input_path)) {
for (const auto& dir_entry : fs::directory_iterator(input_path)) {
ov::Tensor image = utils::load_image(dir_entry.path());
images.push_back(std::move(image));
}
} else if (fs::is_regular_file(input_path)) {
ov::Tensor image = utils::load_image(input_path);
images.push_back(std::move(image));
}
}

if (images.empty())
throw std::runtime_error("No one image found by path " + input_path);
else
properies.insert(images.size() == 1 ? ov::genai::image(images.at(0)) : ov::genai::images(images));

ov::AnyMap enable_compile_cache;
if ("GPU" == device) {
// Cache compiled models on disk for GPU to save time on the
// next run. It's not beneficial for CPU.
enable_compile_cache.insert({ov::cache_dir("vlm_cache")});
}
ov::genai::VLMPipeline pipe(argv[1], device, enable_compile_cache);
ov::genai::VLMPipeline pipe(model_path, device, enable_compile_cache);
std::string prompt;

pipe.start_chat();
std::cout << "question:\n";
if (!std::getline(std::cin, prompt)) {
throw std::runtime_error("std::cin failed");
}
pipe.generate(
prompt,
// ov::genai::image(std::move(image)),
ov::genai::generation_config(ov::genai::beam_search()),
// ov::genai::generation_config(ov::genai::greedy()),
// ov::genai::generation_config(ov::genai::multinomial()),
ov::genai::streamer(print_subword)
);
auto resuls = pipe.generate(prompt, properies);
if (sampling_method == "beam_search") {
std::cout << resuls.texts.at(0) << std::endl;
}
std::cout << "\n----------\n"
"question:\n";
while (std::getline(std::cin, prompt)) {
pipe.generate(prompt,
ov::genai::generation_config(ov::genai::beam_search()),
// ov::genai::generation_config(ov::genai::greedy()),
// ov::genai::generation_config(ov::genai::multinomial()),
ov::genai::streamer(print_subword));
resuls = pipe.generate(prompt, properies);
if (sampling_method == "beam_search") {
std::cout << resuls.texts.at(0) << std::endl;
}
std::cout << "\n----------\n"
"question:\n";
}
Expand Down
12 changes: 6 additions & 6 deletions src/cpp/src/sampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -577,13 +577,13 @@ std::vector<int64_t> Sampler::_try_finish_generation(SequenceGroup::Ptr & sequen
}


std::vector<int32_t> Sampler::get_beam_idxs(uint64_t request_id) {
std::vector<int32_t> beams;
if (m_beam_search_info.find(request_id) != m_beam_search_info.end()) {
GroupBeamSearcher beam_searcher = m_beam_search_info.at(request_id);
std::vector<int32_t> beams = beam_searcher.get_beam_idxs();
std::vector<int32_t> Sampler::get_beam_idxs(SequenceGroup::CPtr request) {
uint64_t request_id = request->get_request_id();
auto beam_searcher = m_beam_search_info.find(request_id);
if (m_beam_search_info.find(request_id) == m_beam_search_info.end()) {
return std::vector(request->num_running_seqs(), 0);
}
return beams;
return beam_searcher->second.get_beam_idxs();
}


Expand Down
2 changes: 1 addition & 1 deletion src/cpp/src/sampler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class Sampler {
SamplerOutput sample(std::vector<SequenceGroup::Ptr> & sequence_groups, ov::Tensor logits);
void set_seed(size_t seed) { rng_engine.seed(seed); }
void clear_beam_search_info(uint64_t request_id);
std::vector<int32_t> get_beam_idxs(uint64_t request_id);
std::vector<int32_t> get_beam_idxs(SequenceGroup::CPtr);
};

class Sampler::GroupBeamSearcher {
Expand Down
Loading

0 comments on commit 31cdcbf

Please sign in to comment.