vllm-project · robertgshaw2-neuralmagic · Dec 11, 2024 · Dec 11, 2024 · Dec 11, 2024 · Dec 11, 2024
@@ -5,7 +5,6 @@
 import threading
 import time
 from multiprocessing.process import BaseProcess
-from multiprocessing.sharedctypes import Synchronized
 from typing import List, Tuple, Type, Union
 
 import zmq
@@ -133,13 +132,9 @@ def __init__(
         input_path: str,
         output_path: str,
         ready_path: str,
-        should_shutdown: Synchronized,
     ):
         super().__init__(vllm_config, executor_class, usage_context)
 
-        # Signal from main process to shutdown (multiprocessing.Value).
-        self.should_shutdown = should_shutdown
-
         # Background Threads and Queues for IO. These enable us to
         # overlap ZMQ socket IO with GPU since they release the GIL,
         # and to overlap some serialization/deserialization with the
@@ -195,7 +190,6 @@ def make_engine_core_process(
         input_path: str,
         output_path: str,
         ready_path: str,
-        should_shutdown: Synchronized,
     ) -> BaseProcess:
         # The current process might have CUDA context,
         # so we need to spawn a new process.
@@ -210,7 +204,6 @@ def make_engine_core_process(
             "vllm_config": vllm_config,
             "executor_class": executor_class,
             "usage_context": usage_context,
-            "should_shutdown": should_shutdown
         }
         # Run EngineCore busy loop in background process.
         proc = context.Process(target=EngineCoreProc.run_engine_core,
@@ -260,8 +253,8 @@ def signal_handler(signum, frame):
     def run_busy_loop(self):
         """Core busy loop of the EngineCore."""
 
-        # Loop until we get a shutdown signal.
-        while not self.should_shutdown:
+        # Loop until process is sent a SIGINT or SIGTERM
+        while True:
             # 1) Poll the input queue until there is work to do.
             if not self.scheduler.has_unfinished_requests():
                 while True:
@@ -272,8 +265,6 @@ def run_busy_loop(self):
                     except queue.Empty:
                         self._log_stats()
                         logger.debug("EngineCore busy loop waiting.")
-                        if self.should_shutdown:
-                            return
                     except BaseException:
                         raise
 

@@ -1,5 +1,4 @@
 import atexit
-import multiprocessing
 from typing import List, Union
 
 import msgspec
@@ -149,21 +148,16 @@ def __init__(
         self.input_socket.bind(input_path)
 
         # Start EngineCore in background process.
-        self.should_shutdown = multiprocessing.Value('b', False, lock=False)
         self.proc = EngineCoreProc.make_engine_core_process(
             *args,
             input_path=input_path,
             output_path=output_path,
             ready_path=ready_path,
-            should_shutdown=self.should_shutdown,
             **kwargs,
         )
         atexit.register(self.shutdown)
 
     def shutdown(self):
-        # Send shutdown signal to background process.
-        self.should_shutdown = True
-
         # Shut down the zmq context.
         self.ctx.destroy(linger=0)