diff --git a/vllm/commit_id.py b/vllm/commit_id.py new file mode 100644 index 0000000000000..42f7c01c97b2e --- /dev/null +++ b/vllm/commit_id.py @@ -0,0 +1 @@ +__commit__ = "273da1dab05e25a8fae5ac22cb18eb3785fe2a14" diff --git a/vllm/core/block/prefix_caching_block.py b/vllm/core/block/prefix_caching_block.py index 18b7930022665..f4a4169ecda39 100644 --- a/vllm/core/block/prefix_caching_block.py +++ b/vllm/core/block/prefix_caching_block.py @@ -963,7 +963,7 @@ def _update_seq_hashes(self, seq: Sequence) -> None: # NOTE: If there are any factors affecting the block besides # token_ids, they should be added as input to contextual_hash. - contextual_hash = seq.hash_of_block_v2() + contextual_hash = seq.contextual_hash_of_block() # This has to be kept in sync with the allocator's hash # calculation. diff --git a/vllm/core/block_manager.py b/vllm/core/block_manager.py index 23297e5dc21d2..0689282425514 100644 --- a/vllm/core/block_manager.py +++ b/vllm/core/block_manager.py @@ -153,7 +153,7 @@ def _allocate_sequence(self, seq: Sequence) -> BlockTable: if seq.get_token_ids(): # NOTE: If there are any factors affecting the block besides # token_ids, they should be added as input to contextual_hash. - contextual_hash = seq.hash_of_block_v2() + contextual_hash = seq.contextual_hash_of_block() # Add blocks to the block table only if the sequence is non empty. block_table.allocate(token_ids=seq.get_token_ids(), @@ -243,7 +243,7 @@ def append_slots( token_ids=block_table.get_unseen_token_ids(seq.get_token_ids()), num_lookahead_slots=num_lookahead_slots, num_computed_slots=seq.data.get_num_computed_tokens(), - contextual_hash=seq.hash_of_block_v2(), + contextual_hash=seq.contextual_hash_of_block(), ) # Return any new copy-on-writes. new_cows = self.block_allocator.clear_copy_on_writes() diff --git a/vllm/sequence.py b/vllm/sequence.py index d9ffe8146fdce..79ddea236a165 100644 --- a/vllm/sequence.py +++ b/vllm/sequence.py @@ -527,10 +527,10 @@ def hash_of_block(self, logical_idx: int) -> int: hashed_tokens = self.data.get_prefix_token_ids(num_tokens) return hash((hashed_tokens, self.lora_int_id)) - def hash_of_block_v2(self) -> int: - # This function is introduced for BlockSpaceManagerV2 and is used with - # prefix caching mode. The final block hash is determined by applying - # token_ids in PrefixCachingBlock under BlockSpaceManagerV2. + def contextual_hash_of_block(self) -> int: + # This function computes a contextual hash for a block, specifically + # designed for prefix caching mode. The final block hash is determined + # by applying token_ids in PrefixCachingBlock. # NOTE: If there are additional factors influencing the block aside from # token_ids, include them as input parameters to the hash.