Skip to content

Commit

Permalink
Fixed merge
Browse files Browse the repository at this point in the history
  • Loading branch information
AsyaPronina committed Dec 24, 2024
1 parent ef82087 commit c52bd12
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 17 deletions.
12 changes: 6 additions & 6 deletions src/cpp/src/llm_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -678,13 +678,13 @@ class ContinuousBatchingAdapter final : public LLMPipelineImplBase {

/*
* NPU reads some properties from the config file, but when LLMPipeline is initialized
* from the model_str and weights_tensor, there are not files.
* from the model_str and weights_tensor, there are no files.
* In the later case ModelDesc is stored in properties.
* This function pops ModelDescr from the the properties and returns a pair of updated properties and ModelDescr.
*/
std::pair<ov::AnyMap, ov::genai::ModelConfigDesc> split_model_descr(const ov::AnyMap& properties) {
std::pair<ov::AnyMap, ov::genai::static_llm::ModelConfigDesc> split_model_descr(const ov::AnyMap& properties) {
ov::AnyMap main_properties = properties;
ov::genai::ModelConfigDesc model_descr;
ov::genai::static_llm::ModelConfigDesc model_descr;

auto pop_property = [](ov::AnyMap& orig_propertis, const std::string& key, auto& value) {
if (orig_propertis.find(key) != orig_propertis.end()) {
Expand Down Expand Up @@ -721,7 +721,7 @@ ov::genai::LLMPipeline::LLMPipeline(
if (properties.find(ov::genai::scheduler_config.name()) != properties.end()) {
auto [plugin_config, scheduler_config] = utils::split_scheduler_config(properties);
m_pimpl = std::make_unique<ContinuousBatchingAdapter>(models_path, tokenizer, scheduler_config, device, plugin_config);
} else if ("NPU" == device) {
} else if (device == "NPU") {
m_pimpl = static_llm::LLMPipelineFactory::create(models_path, tokenizer, device, properties);
} else {
m_pimpl = std::make_unique<StatefulLLMPipeline>(models_path, tokenizer, device, properties);
Expand All @@ -740,7 +740,7 @@ ov::genai::LLMPipeline::LLMPipeline(
if (config.find(ov::genai::scheduler_config.name()) != config.end()) {
auto [plugin_config, scheduler_config] = utils::split_scheduler_config(config);
m_pimpl = std::make_unique<ContinuousBatchingAdapter>(models_path, scheduler_config, device, plugin_config);
} else if ("NPU" == device) {
} else if (device == "NPU") {
m_pimpl = static_llm::LLMPipelineFactory::create(models_path, device, config);
} else {
m_pimpl = std::make_unique<StatefulLLMPipeline>(models_path, device, config);
Expand Down Expand Up @@ -778,7 +778,7 @@ ov::genai::LLMPipeline::LLMPipeline(
// This will convert from AnyMap to ModelDesc.
auto [properties, model_descr] = split_model_descr(plugin_config);

m_pimpl = std::make_unique<StaticLLMPipeline>(
m_pimpl = static_llm::LLMPipelineFactory::create(
utils::singleton_core().read_model(model_str, weights_tensor),
model_descr,
tokenizer,
Expand Down
56 changes: 46 additions & 10 deletions src/cpp/src/llm_pipeline_static.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -396,12 +396,12 @@ KVAxesPosition get_kv_axes(const std::string& model_type) {
return axes;
}

ov::genai::ModelConfigDesc get_modeldesc_from_json(const std::filesystem::path& filepath) {
ov::genai::static_llm::ModelConfigDesc get_modeldesc_from_json(const std::filesystem::path& filepath) {
std::ifstream file(filepath);
OPENVINO_ASSERT(file.is_open(), "Could not open file: " + filepath.string());
nlohmann::json config_data = nlohmann::json::parse(file);

ov::genai::ModelConfigDesc desc;
ov::genai::static_llm::ModelConfigDesc desc;
desc.type = config_data["model_type"].get<std::string>();
// NB: In case _name_or_path field isn't presented in config.json
if (config_data.contains("_name_or_path")) {
Expand All @@ -412,7 +412,7 @@ ov::genai::ModelConfigDesc get_modeldesc_from_json(const std::filesystem::path&
return desc;
}

std::string model_desc_to_string(const ov::genai::ModelConfigDesc& model_desc) {
std::string model_desc_to_string(const ov::genai::static_llm::ModelConfigDesc& model_desc) {
std::map<std::string, std::string> model_desc_map;
model_desc_map["type"] = model_desc.type;
model_desc_map["name_or_path"] = model_desc.name_or_path;
Expand Down Expand Up @@ -672,7 +672,7 @@ StatefulLLMPipeline::StatefulLLMPipeline(
utils::from_config_json_if_exists(models_path)) {

auto model = genai::utils::singleton_core().read_model((models_path / "openvino_model.xml").string());
ov::genai::ModelConfigDesc model_desc = get_modeldesc_from_json(models_path / "config.json");
ModelConfigDesc model_desc = get_modeldesc_from_json(models_path / "config.json");
ov::AnyMap properties = config;

auto compiled = setupAndCompileModel(model, model_desc, properties);
Expand All @@ -698,7 +698,7 @@ StatefulLLMPipeline::StatefulLLMPipeline(
OPENVINO_ASSERT(!use_blobs, "blobs cannot be used with model string and weights tensor");

ov::AnyMap properties_copy = properties;
auto compiled = setupAndCompileModel(model, model_desc, properties);
auto compiled = setupAndCompileModel(model, model_desc, properties_copy);
m_request = compiled->create_infer_request();
}

Expand All @@ -721,10 +721,7 @@ std::shared_ptr<ov::CompiledModel> StatefulLLMPipeline::setupAndCompileModel(

rename_key(pipeline_config, "PREFILL_CONFIG", "NPUW_LLM_PREFILL_CONFIG");
rename_key(pipeline_config, "GENERATE_CONFIG", "NPUW_LLM_GENERATE_CONFIG");

// FIXME: Support CACHE_DIR in future
drop_cache_dir(pipeline_config);


return std::make_shared<ov::CompiledModel>(genai::utils::singleton_core().compile_model(model, "NPU", pipeline_config));
}

Expand Down Expand Up @@ -882,6 +879,19 @@ EncodedResults StatefulLLMPipeline::generate(

// TODO: How to check that KV-Cache is full?
}

if (streamer_ptr) {
streamer_ptr->end();
}

auto stop_time = std::chrono::steady_clock::now();
// If is called without tokenization then that stat will not be reported.
auto& metrics = results.perf_metrics;
metrics.num_input_tokens = batch_size * input_ids.get_shape().at(1);
metrics.load_time = this->m_load_time_ms;
metrics.raw_metrics.generate_durations.emplace_back(PerfMetrics::get_microsec(stop_time - start_time));
metrics.evaluate_statistics(start_time);
return results;
}

void StatefulLLMPipeline::start_chat(const std::string& system_message) {
Expand Down Expand Up @@ -918,7 +928,7 @@ StatelessLLMPipeline::StatelessLLMPipeline(
*/
const auto use_blobs = pop_or_default(properties, "USE_BLOBS", false);
if (!use_blobs) {
ov::genai::ModelConfigDesc model_desc = get_modeldesc_from_json(models_path / "config.json");
ModelConfigDesc model_desc = get_modeldesc_from_json(models_path / "config.json");
auto model = genai::utils::singleton_core().read_model((models_path / "openvino_model.xml").string());
setupAndCompileModels(model, device, model_desc, properties);
} else {
Expand Down Expand Up @@ -1397,6 +1407,32 @@ LLMPipelineFactory::create(const std::filesystem::path& models_path,
const ov::AnyMap& config) {
return create(models_path, Tokenizer(models_path), device, config);
}

std::unique_ptr<LLMPipelineImplBase> LLMPipelineFactory::create(const std::shared_ptr<ov::Model>& model,
const ModelConfigDesc& model_desc,
const ov::genai::Tokenizer& tokenizer,
const std::string& device,
const ov::AnyMap& properties,
const ov::genai::GenerationConfig& generation_config) {
auto properties_copy = properties;
const auto pipeline_mode = pop_or_default(properties_copy, "NPU_PIPELINE", std::string("STATELESS"));
OPENVINO_ASSERT(pipeline_mode == "STATELESS" || pipeline_mode == "STATEFUL",
"Only STATELESS and STATEFULL NPU_PIPELINE modes are supported!");
if (pipeline_mode == "STATEFUL") {
return std::make_unique<ov::genai::static_llm::StatefulLLMPipeline>(model,
model_desc,
tokenizer,
device,
properties_copy,
generation_config);
}
return std::make_unique<ov::genai::static_llm::StatelessLLMPipeline>(model,
model_desc,
tokenizer,
device,
properties_copy,
generation_config);
}
} // namespace static_llm
} // namespace genai
} // namespace ov
8 changes: 7 additions & 1 deletion src/cpp/src/llm_pipeline_static.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,13 @@ struct LLMPipelineFactory {
static std::unique_ptr<LLMPipelineImplBase> create(const std::filesystem::path& path,
const std::string& device,
const ov::AnyMap& config);

static std::unique_ptr<LLMPipelineImplBase> create(const std::shared_ptr<ov::Model>& model,
const ModelConfigDesc& model_desc,
const ov::genai::Tokenizer& tokenizer,
const std::string& device,
const ov::AnyMap& properties,
const ov::genai::GenerationConfig& generation_config = {});
};

class StatefulLLMPipeline : public LLMPipelineImplBase {
Expand All @@ -48,7 +55,6 @@ class StatefulLLMPipeline : public LLMPipelineImplBase {

std::shared_ptr<ov::CompiledModel> setupAndCompileModel(
const std::shared_ptr<ov::Model>& model,
const std::string& device,
const ModelConfigDesc& model_desc,
ov::AnyMap& pipeline_config);

Expand Down

0 comments on commit c52bd12

Please sign in to comment.