Skip to content

Commit

Permalink
wa for blocks
Browse files Browse the repository at this point in the history
  • Loading branch information
iefode committed Jul 23, 2024
1 parent 7998dd6 commit e66aae9
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,15 @@ class SpeculativeDecodingPipeline {
assisting_scheduler_config.cache_size = assisted_cache_size;
}
model_pipeline = ContinuousBatchingPipeline(models_path, model_scheduler_config, device, plugin_config);
// assisting_pipeline = ContinuousBatchingPipeline(assisting_model_path, assisting_scheduler_config, device, plugin_config);
assisting_pipeline = ContinuousBatchingPipeline(models_path, assisting_scheduler_config, device, plugin_config);
assisting_pipeline = ContinuousBatchingPipeline(assisting_model_path, assisting_scheduler_config, device, plugin_config);
// assisting_pipeline = ContinuousBatchingPipeline(models_path, assisting_scheduler_config, device, plugin_config);
// assisting_pipeline.set_to_free_sequences(false);
m_tokenizer = std::make_shared<ov::genai::Tokenizer>(models_path);
}

void step() {
ContinuousBatchingPipeline::GeneratedTokensMap candidate_sequences;
std::cout << "K: " << k << std::endl;
if (is_speculative_mode) {
// generate candidates using small model
for (size_t i = 0; i < k; ++i) {
Expand Down
5 changes: 5 additions & 0 deletions src/cpp/continuous_batching/src/block_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,11 @@ class BlockManager {
OPENVINO_ASSERT(can_allocate_blocks(num_logical_blocks - num_physical_blocks));
allocate(seq_id, num_logical_blocks - num_physical_blocks);
} else {
// todo: iefode
if (num_physical_blocks > num_logical_blocks) {
free_sequence_partially(seq_id, num_physical_blocks - num_logical_blocks);
num_physical_blocks = block_table.size();
}
OPENVINO_ASSERT(num_logical_blocks == num_physical_blocks, "A number of physical and logic blocks must be the same in this code path");
KVCacheBlock::Ptr last_block = block_table.back();

Expand Down

0 comments on commit e66aae9

Please sign in to comment.