From aa6954fb686ecbbc6bfba25b51f836f7c9401e2f Mon Sep 17 00:00:00 2001 From: "rshaw@neuralmagic.com" Date: Fri, 3 Jan 2025 19:06:14 +0000 Subject: [PATCH] updated --- vllm/v1/engine/async_llm.py | 16 ---------------- vllm/v1/engine/core_client.py | 19 ++++++++++++++++++- vllm/v1/engine/llm_engine.py | 1 - 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py index ff7a0c28dd91a..564d8a8343bef 100644 --- a/vllm/v1/engine/async_llm.py +++ b/vllm/v1/engine/async_llm.py @@ -1,6 +1,5 @@ import asyncio import os -import signal from typing import AsyncGenerator, Dict, List, Mapping, Optional, Type, Union from vllm.config import ModelConfig, VllmConfig @@ -42,21 +41,6 @@ def __init__( start_engine_loop: bool = True, ) -> None: - # The child processes will send SIGQUIT when unrecoverable - # errors happen. We kill the process tree here so that the - # stack trace is very evident. - # TODO: rather than killing the main process, we should - # figure out how to raise an AsyncEngineDeadError and - # handle at the API server level so we can return a better - # error code to the clients calling VLLM. - def sigquit_handler(signum, frame): - logger.fatal( - "AsyncLLM got SIGQUIT from worker processes, shutting " - "down. See stack trace above for root cause issue.") - kill_process_tree(os.getpid()) - - signal.signal(signal.SIGQUIT, sigquit_handler) - assert start_engine_loop self.log_requests = log_requests diff --git a/vllm/v1/engine/core_client.py b/vllm/v1/engine/core_client.py index e009f3448bf69..f52ecb29fde40 100644 --- a/vllm/v1/engine/core_client.py +++ b/vllm/v1/engine/core_client.py @@ -1,3 +1,5 @@ +import os +import signal import weakref from abc import ABC, abstractmethod from typing import List, Type @@ -8,7 +10,8 @@ from vllm.config import VllmConfig from vllm.logger import init_logger -from vllm.utils import get_open_zmq_ipc_path, make_zmq_socket +from vllm.utils import (get_open_zmq_ipc_path, make_zmq_socket, + kill_process_tree) from vllm.v1.engine import (EngineCoreOutput, EngineCoreOutputs, EngineCoreProfile, EngineCoreRequest, EngineCoreRequestType, EngineCoreRequestUnion) @@ -134,6 +137,20 @@ def __init__( executor_class: Type[Executor], log_stats: bool = False, ): + # The child processes will send SIGQUIT when unrecoverable + # errors happen. We kill the process tree here so that the + # stack trace is very evident. + # TODO(rob): rather than killing the main process, we should + # figure out how to raise an AsyncEngineDeadError and + # handle at the API server level so we can return a better + # error code to the clients calling VLLM. + def sigquit_handler(signum, frame): + logger.fatal( + "Got SIGQUIT from worker processes, shutting " + "down. See stack trace above for root cause issue.") + kill_process_tree(os.getpid()) + signal.signal(signal.SIGQUIT, sigquit_handler) + # Serialization setup. self.encoder = PickleEncoder() self.decoder = msgspec.msgpack.Decoder(EngineCoreOutputs) diff --git a/vllm/v1/engine/llm_engine.py b/vllm/v1/engine/llm_engine.py index 1f49de67d7493..016ed7438c5a2 100644 --- a/vllm/v1/engine/llm_engine.py +++ b/vllm/v1/engine/llm_engine.py @@ -42,7 +42,6 @@ def __init__( use_cached_outputs: bool = False, multiprocess_mode: bool = False, ) -> None: - # TODO: Can we avoid this? self.model_config = vllm_config.model_config