Skip to content

Commit

Permalink
Remove swapped from the set over which we count the scheduler time
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-mkeralapura committed Aug 2, 2024
1 parent 3a2f2c1 commit 055cb66
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 4 deletions.
5 changes: 2 additions & 3 deletions vllm/core/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1050,10 +1050,9 @@ def schedule(self) -> Tuple[List[SequenceGroupMetadata], SchedulerOutputs]:
scheduled_seq_group.seq_group)

scheduler_time = time.time() - scheduler_start_time
# Add this to scheduler time to all the sequences that are either currently running or
# swapped out. This is not added to the ones waiting on the queue and never scheduled.
# Add this to scheduler time to all the sequences that are currently running.
# This will help estimate if the scheduler is a significant component in the e2e latency.
for seq_group in self.running + self.swapped:
for seq_group in self.running:
if seq_group.metrics.scheduler_time is not None:
seq_group.metrics.scheduler_time += scheduler_time
else:
Expand Down
1 change: 0 additions & 1 deletion vllm/worker/worker_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,6 @@ def execute_model(
if not get_pp_group().is_first_rank:
intermediate_tensors = IntermediateTensors(
get_pp_group().recv_tensor_dict())

output = self.model_runner.execute_model(
model_input, self.kv_cache[worker_input.virtual_engine]
if self.kv_cache is not None else None, intermediate_tensors,
Expand Down

0 comments on commit 055cb66

Please sign in to comment.