diff --git a/vllm/worker/hpu_model_runner.py b/vllm/worker/hpu_model_runner.py index a6565ce9000a8..404c5b70b6274 100755 --- a/vllm/worker/hpu_model_runner.py +++ b/vllm/worker/hpu_model_runner.py @@ -1558,7 +1558,7 @@ def warmup_scenario(self, seqs = [ self.create_dummy_seq_group_metadata( i, - b * self.block_size - 1, + b * self.block_size - 2, is_prompt, lora_request=dummy_lora_requests_per_seq[i] if dummy_lora_requests_per_seq else None, @@ -1582,7 +1582,7 @@ def warmup_scenario(self, self.execute_model(inputs, kv_caches, warmup_mode=True, - num_steps=2, + num_steps=3, seqs=seqs) inputs = dataclasses.replace(inputs, is_first_multi_step=False,