From ad0d567e1cdc77aff435b20bac918bfd0f55db0a Mon Sep 17 00:00:00 2001 From: Robert Shaw <114415538+robertgshaw2-neuralmagic@users.noreply.github.com> Date: Fri, 3 Jan 2025 18:25:02 -0500 Subject: [PATCH] [V1] Chore: cruft removal (#11724) --- vllm/entrypoints/llm.py | 2 -- vllm/v1/engine/core_client.py | 2 -- vllm/v1/engine/llm_engine.py | 4 ---- vllm/v1/engine/processor.py | 3 --- 4 files changed, 11 deletions(-) diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index 7c0de3b3e5481..e48fd1a4fa5e9 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -225,8 +225,6 @@ def __init__( # Logic to switch between engines is done at runtime instead of import # to avoid import order issues self.engine_class = self.get_engine_class() - - # TODO(rob): enable mp by default (issue with fork vs spawn) self.llm_engine = self.engine_class.from_engine_args( engine_args, usage_context=UsageContext.LLM_CLASS) diff --git a/vllm/v1/engine/core_client.py b/vllm/v1/engine/core_client.py index 6a40c961fc1d7..a4a45ae05ff9e 100644 --- a/vllm/v1/engine/core_client.py +++ b/vllm/v1/engine/core_client.py @@ -94,8 +94,6 @@ class InprocClient(EngineCoreClient): * pushes EngineCoreRequest directly into the EngineCore * pulls EngineCoreOutputs by stepping the EngineCore - - TODO: support asyncio-mode for debugging. """ def __init__(self, *args, **kwargs): diff --git a/vllm/v1/engine/llm_engine.py b/vllm/v1/engine/llm_engine.py index 1f49de67d7493..0bd9b52c9be82 100644 --- a/vllm/v1/engine/llm_engine.py +++ b/vllm/v1/engine/llm_engine.py @@ -42,8 +42,6 @@ def __init__( use_cached_outputs: bool = False, multiprocess_mode: bool = False, ) -> None: - - # TODO: Can we avoid this? self.model_config = vllm_config.model_config # Tokenizer (+ ensure liveness if running in another process). @@ -179,8 +177,6 @@ def step(self) -> List[RequestOutput]: return request_outputs - # TODO(rob): Can we get rid of these? - def get_model_config(self): return self.model_config diff --git a/vllm/v1/engine/processor.py b/vllm/v1/engine/processor.py index 905d3d1fc3e1c..c0f6cfab4865c 100644 --- a/vllm/v1/engine/processor.py +++ b/vllm/v1/engine/processor.py @@ -49,9 +49,6 @@ def __init__( cache_config.enable_prefix_caching self.mm_hasher = MMHasher() - # TODO: run in an ThreadpoolExecutor or BackgroundProcess. - # This ideally should releases the GIL, so we should not block the - # asyncio loop while this is running. def process_inputs( self, request_id: str,