Remove swapped from the set over which we count the scheduler time

Snowflake-Labs · Aug 2, 2024 · 055cb66 · 055cb66
1 parent 3a2f2c1
commit 055cb66
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 4 deletions.
diff --git a/vllm/core/scheduler.py b/vllm/core/scheduler.py
@@ -1050,10 +1050,9 @@ def schedule(self) -> Tuple[List[SequenceGroupMetadata], SchedulerOutputs]:
                 scheduled_seq_group.seq_group)
 
         scheduler_time = time.time() - scheduler_start_time
-        # Add this to scheduler time to all the sequences that are either currently running or
-        # swapped out. This is not added to the ones waiting on the queue and never scheduled.
+        # Add this to scheduler time to all the sequences that are currently running.
         # This will help estimate if the scheduler is a significant component in the e2e latency.
-        for seq_group in self.running + self.swapped:
+        for seq_group in self.running:
             if seq_group.metrics.scheduler_time is not None:
                 seq_group.metrics.scheduler_time += scheduler_time
             else:

diff --git a/vllm/worker/worker_base.py b/vllm/worker/worker_base.py
@@ -270,7 +270,6 @@ def execute_model(
         if not get_pp_group().is_first_rank:
             intermediate_tensors = IntermediateTensors(
                 get_pp_group().recv_tensor_dict())
-
         output = self.model_runner.execute_model(
             model_input, self.kv_cache[worker_input.virtual_engine]
             if self.kv_cache is not None else None, intermediate_tensors,