From aa6954fb686ecbbc6bfba25b51f836f7c9401e2f Mon Sep 17 00:00:00 2001
From: "rshaw@neuralmagic.com" <rshaw@neuralmagic.com>
Date: Fri, 3 Jan 2025 19:06:14 +0000
Subject: [PATCH] updated

---
 vllm/v1/engine/async_llm.py   | 16 ----------------
 vllm/v1/engine/core_client.py | 19 ++++++++++++++++++-
 vllm/v1/engine/llm_engine.py  |  1 -
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py
index ff7a0c28dd91a..564d8a8343bef 100644
--- a/vllm/v1/engine/async_llm.py
+++ b/vllm/v1/engine/async_llm.py
@@ -1,6 +1,5 @@
 import asyncio
 import os
-import signal
 from typing import AsyncGenerator, Dict, List, Mapping, Optional, Type, Union
 
 from vllm.config import ModelConfig, VllmConfig
@@ -42,21 +41,6 @@ def __init__(
         start_engine_loop: bool = True,
     ) -> None:
 
-        # The child processes will send SIGQUIT when unrecoverable
-        # errors happen. We kill the process tree here so that the
-        # stack trace is very evident.
-        # TODO: rather than killing the main process, we should
-        # figure out how to raise an AsyncEngineDeadError and
-        # handle at the API server level so we can return a better
-        # error code to the clients calling VLLM.
-        def sigquit_handler(signum, frame):
-            logger.fatal(
-                "AsyncLLM got SIGQUIT from worker processes, shutting "
-                "down. See stack trace above for root cause issue.")
-            kill_process_tree(os.getpid())
-
-        signal.signal(signal.SIGQUIT, sigquit_handler)
-
         assert start_engine_loop
 
         self.log_requests = log_requests
diff --git a/vllm/v1/engine/core_client.py b/vllm/v1/engine/core_client.py
index e009f3448bf69..f52ecb29fde40 100644
--- a/vllm/v1/engine/core_client.py
+++ b/vllm/v1/engine/core_client.py
@@ -1,3 +1,5 @@
+import os
+import signal
 import weakref
 from abc import ABC, abstractmethod
 from typing import List, Type
@@ -8,7 +10,8 @@
 
 from vllm.config import VllmConfig
 from vllm.logger import init_logger
-from vllm.utils import get_open_zmq_ipc_path, make_zmq_socket
+from vllm.utils import (get_open_zmq_ipc_path, make_zmq_socket,
+                        kill_process_tree)
 from vllm.v1.engine import (EngineCoreOutput, EngineCoreOutputs,
                             EngineCoreProfile, EngineCoreRequest,
                             EngineCoreRequestType, EngineCoreRequestUnion)
@@ -134,6 +137,20 @@ def __init__(
         executor_class: Type[Executor],
         log_stats: bool = False,
     ):
+        # The child processes will send SIGQUIT when unrecoverable
+        # errors happen. We kill the process tree here so that the
+        # stack trace is very evident.
+        # TODO(rob): rather than killing the main process, we should
+        # figure out how to raise an AsyncEngineDeadError and
+        # handle at the API server level so we can return a better
+        # error code to the clients calling VLLM.
+        def sigquit_handler(signum, frame):
+            logger.fatal(
+                "Got SIGQUIT from worker processes, shutting "
+                "down. See stack trace above for root cause issue.")
+            kill_process_tree(os.getpid())
+        signal.signal(signal.SIGQUIT, sigquit_handler)
+
         # Serialization setup.
         self.encoder = PickleEncoder()
         self.decoder = msgspec.msgpack.Decoder(EngineCoreOutputs)
diff --git a/vllm/v1/engine/llm_engine.py b/vllm/v1/engine/llm_engine.py
index 1f49de67d7493..016ed7438c5a2 100644
--- a/vllm/v1/engine/llm_engine.py
+++ b/vllm/v1/engine/llm_engine.py
@@ -42,7 +42,6 @@ def __init__(
         use_cached_outputs: bool = False,
         multiprocess_mode: bool = False,
     ) -> None:
-
         # TODO: Can we avoid this?
         self.model_config = vllm_config.model_config