From cfba4def5d422dfbafbc21b8f695bdcb2295aa19 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Mon, 12 Aug 2024 09:58:28 -0700 Subject: [PATCH] [Bugfix] Fix logit soft cap in flash-attn backend (#7425) --- vllm/attention/backends/flash_attn.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm/attention/backends/flash_attn.py b/vllm/attention/backends/flash_attn.py index 5710aa1930b79..160bf2307fbf5 100644 --- a/vllm/attention/backends/flash_attn.py +++ b/vllm/attention/backends/flash_attn.py @@ -563,6 +563,7 @@ def forward( softmax_scale=self.scale, causal=True, alibi_slopes=self.alibi_slopes, + softcap=self.logits_soft_cap, ).squeeze(1) # Reshape the output tensor.