Skip to content

Commit

Permalink
Fix wrong logits processing without applying of slice matmul (#1217)
Browse files Browse the repository at this point in the history
  • Loading branch information
sbalandi authored Nov 16, 2024
1 parent 96bcffe commit 937d447
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 7 deletions.
5 changes: 0 additions & 5 deletions src/cpp/src/llm_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -269,18 +269,13 @@ class StatefulLLMPipeline final : public LLMPipelineImplBase {
SequenceGroup::Ptr sequence_group;
if (is_chat_conversation && !m_is_cache_empty) {
sequence_group = std::make_shared<SequenceGroup>(request_id, m_tokenized_chat_history.input_ids, config, block_size, enable_prefix_caching);
sequence_group->update_processed_tokens_num(m_tokenized_chat_history.input_ids.get_shape().at(1) - 1);
} else {
size_t seq_len = input_ids.get_shape().at(1);
size_t batch_offset = request_id * seq_len;
const int64_t* prompt_start = input_ids.data<const int64_t>() + batch_offset;
std::vector<int64_t> tokenized_prompt(prompt_start, prompt_start + seq_len);
// in case of multi batch scenario, remove eos_token_id at start of prompt
auto real_prompt_start = std::find_if(tokenized_prompt.begin(), tokenized_prompt.end(), [&config](int64_t token) { return token != config.eos_token_id; });
tokenized_prompt.erase(tokenized_prompt.begin(), real_prompt_start);

sequence_group = std::make_shared<SequenceGroup>(request_id, tokenized_prompt, config, block_size, enable_prefix_caching);
sequence_group->update_processed_tokens_num(tokenized_prompt.size() - 1);
}

sequence_group->set_sequence_group_ptr(sequence_group);
Expand Down
5 changes: 4 additions & 1 deletion src/cpp/src/lm_encoding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,12 @@ std::pair<EncodedResults, int32_t> get_lm_encoded_results(
auto logits = m_llm.get_tensor("logits");

int64_t sequence_len = logits.get_shape().at(1);
for (auto& sequence_group : sequence_groups)
for (auto& sequence_group : sequence_groups) {
sequence_group->update_processed_tokens_num(sequence_group->get_prompt_len() - sequence_len);
sequence_group->schedule_tokens(sequence_len);

}

std::map<size_t, size_t> beam_offets;
for (size_t i = 0; i < sequence_groups.size(); i++)
beam_offets.insert({sequence_groups.at(i)->get_request_id(), i});
Expand Down
1 change: 0 additions & 1 deletion src/cpp/src/visual_language/pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ class ov::genai::VLMPipeline::VLMPipelineImpl {
std::fill_n(prompt_ids.data<int64_t>(), prompt_ids.get_size(), 0);

SequenceGroup::Ptr sequence_group = std::make_shared<SequenceGroup>(request_id, prompt_ids, generation_config, block_size, enable_prefix_caching);
sequence_group->update_processed_tokens_num(history_size);
sequence_group->set_sequence_group_ptr(sequence_group);
requests.push_back(sequence_group);

Expand Down

0 comments on commit 937d447

Please sign in to comment.