Skip to content

Commit

Permalink
Fix bug: Handle the case where the metrics have not yet been initialized
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-mkeralapura committed Aug 2, 2024
1 parent 2d90947 commit 921ef53
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 4 deletions.
5 changes: 4 additions & 1 deletion vllm/core/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1054,7 +1054,10 @@ def schedule(self) -> Tuple[List[SequenceGroupMetadata], SchedulerOutputs]:
# swapped out. This is not added to the ones waiting on the queue and never scheduled.
# This will help estimate if the scheduler is a significant component in the e2e latency.
for seq_group in self.running + self.swapped:
seq_group.metrics.scheduler_time += scheduler_time
if seq_group.metrics.scheduler_time is not None:
seq_group.metrics.scheduler_time += scheduler_time
else:
seq_group.metrics.scheduler_time = scheduler_time

return seq_group_metadata_list, scheduler_outputs

Expand Down
10 changes: 8 additions & 2 deletions vllm/engine/llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -815,8 +815,14 @@ def _process_model_outputs(
seq_group.update_num_computed_tokens(
scheduled_seq_group.token_chunk_size)
for o in outputs:
seq_group.metrics.model_forward_time += o.model_forward_time
seq_group.metrics.model_execute_time += o.model_execute_time
if seq_group.metrics.model_forward_time is not None:
seq_group.metrics.model_forward_time += o.model_forward_time
else:
seq_group.metrics.model_forward_time = o.model_forward_time
if seq_group.metrics.model_execute_time is not None:
seq_group.metrics.model_execute_time += o.model_execute_time
else:
seq_group.metrics.model_execute_time = o.model_execute_time
if self.model_config.embedding_mode:
self._process_sequence_group_outputs(seq_group, outputs)
continue
Expand Down
5 changes: 4 additions & 1 deletion vllm/worker/worker_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,10 @@ def execute_model(
model_input, self.kv_cache[worker_input.virtual_engine]
if self.kv_cache is not None else None, intermediate_tensors,
num_steps)
output[0].model_execute_time = time.time() - start_time
end_time = time.time()
if output is not None:
for o in output:
o.model_execute_time = end_time - start_time
if not get_pp_group().is_last_rank:
# output is IntermediateTensors
get_pp_group().send_tensor_dict(output.tensors)
Expand Down

0 comments on commit 921ef53

Please sign in to comment.