Skip to content

Commit

Permalink
[V1][Core] Remove should_shutdown to simplify core process termination (
Browse files Browse the repository at this point in the history
vllm-project#11113)

Signed-off-by: Tyler Michael Smith <[email protected]>
  • Loading branch information
tlrmchlsmth authored and BKitor committed Dec 30, 2024
1 parent 9326152 commit b5fa460
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 17 deletions.
13 changes: 2 additions & 11 deletions vllm/v1/engine/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import threading
import time
from multiprocessing.process import BaseProcess
from multiprocessing.sharedctypes import Synchronized
from typing import List, Tuple, Type, Union

import zmq
Expand Down Expand Up @@ -133,13 +132,9 @@ def __init__(
input_path: str,
output_path: str,
ready_path: str,
should_shutdown: Synchronized,
):
super().__init__(vllm_config, executor_class, usage_context)

# Signal from main process to shutdown (multiprocessing.Value).
self.should_shutdown = should_shutdown

# Background Threads and Queues for IO. These enable us to
# overlap ZMQ socket IO with GPU since they release the GIL,
# and to overlap some serialization/deserialization with the
Expand Down Expand Up @@ -195,7 +190,6 @@ def make_engine_core_process(
input_path: str,
output_path: str,
ready_path: str,
should_shutdown: Synchronized,
) -> BaseProcess:
# The current process might have CUDA context,
# so we need to spawn a new process.
Expand All @@ -210,7 +204,6 @@ def make_engine_core_process(
"vllm_config": vllm_config,
"executor_class": executor_class,
"usage_context": usage_context,
"should_shutdown": should_shutdown
}
# Run EngineCore busy loop in background process.
proc = context.Process(target=EngineCoreProc.run_engine_core,
Expand Down Expand Up @@ -260,8 +253,8 @@ def signal_handler(signum, frame):
def run_busy_loop(self):
"""Core busy loop of the EngineCore."""

# Loop until we get a shutdown signal.
while not self.should_shutdown:
# Loop until process is sent a SIGINT or SIGTERM
while True:
# 1) Poll the input queue until there is work to do.
if not self.scheduler.has_unfinished_requests():
while True:
Expand All @@ -272,8 +265,6 @@ def run_busy_loop(self):
except queue.Empty:
self._log_stats()
logger.debug("EngineCore busy loop waiting.")
if self.should_shutdown:
return
except BaseException:
raise

Expand Down
6 changes: 0 additions & 6 deletions vllm/v1/engine/core_client.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import atexit
import multiprocessing
from typing import List, Union

import msgspec
Expand Down Expand Up @@ -149,21 +148,16 @@ def __init__(
self.input_socket.bind(input_path)

# Start EngineCore in background process.
self.should_shutdown = multiprocessing.Value('b', False, lock=False)
self.proc = EngineCoreProc.make_engine_core_process(
*args,
input_path=input_path,
output_path=output_path,
ready_path=ready_path,
should_shutdown=self.should_shutdown,
**kwargs,
)
atexit.register(self.shutdown)

def shutdown(self):
# Send shutdown signal to background process.
self.should_shutdown = True

# Shut down the zmq context.
self.ctx.destroy(linger=0)

Expand Down

0 comments on commit b5fa460

Please sign in to comment.