From e5ac6a4199fd967d2655310712cee6e642e91bd7 Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Wed, 23 Oct 2024 12:40:43 -0400 Subject: [PATCH] [Bugfix] Fix divide by zero when serving Mamba models (#9617) Signed-off-by: Tyler Michael Smith --- vllm/engine/llm_engine.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 99beea932882d..167efa51e3e2f 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -1612,7 +1612,7 @@ def _get_stats(self, # KV Cache Usage in % num_total_gpu = self.cache_config.num_gpu_blocks gpu_cache_usage_sys = 0. - if num_total_gpu is not None: + if num_total_gpu: # Guard against both None and 0 num_free_gpu = sum( scheduler.block_manager.get_num_free_gpu_blocks() for scheduler in self.scheduler) @@ -1620,7 +1620,7 @@ def _get_stats(self, num_total_cpu = self.cache_config.num_cpu_blocks cpu_cache_usage_sys = 0. - if num_total_cpu is not None and num_total_cpu > 0: + if num_total_cpu: # Guard against both None and 0 num_free_cpu = sum( scheduler.block_manager.get_num_free_cpu_blocks() for scheduler in self.scheduler)