diff --git a/vllm/v1/engine/llm_engine.py b/vllm/v1/engine/llm_engine.py index 1f49de67d7493..c0d29c7b92bb0 100644 --- a/vllm/v1/engine/llm_engine.py +++ b/vllm/v1/engine/llm_engine.py @@ -97,6 +97,7 @@ def from_engine_args( logger.debug("Enabling multiprocessing for LLMEngine.") enable_multiprocessing = True + print(f"{enable_multiprocessing=}") # Create the LLMEngine. return cls(vllm_config=vllm_config, executor_class=executor_class, diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py index a7723e4e85264..fcef37371a6b9 100644 --- a/vllm/v1/worker/gpu_worker.py +++ b/vllm/v1/worker/gpu_worker.py @@ -34,8 +34,6 @@ def __init__( rank: int, distributed_init_method: str, ): - - self.i = 0 # TODO: use WorkerBase.__init__(self, vllm_config=vllm_config) self.vllm_config = vllm_config @@ -203,10 +201,6 @@ def execute_model( self, scheduler_output: "SchedulerOutput", ) -> ModelRunnerOutput: - if self.rank == 0 and self.i == 10: - raise ValueError("ERROR FROM HERE :)") - self.i += 1 - output = self.model_runner.execute_model(scheduler_output) return output if self.rank == 0 else None