Expand supported attention head sizes (#752)

There's no reason for current attention head sizes restrictions - we theoretically can support any size with current implementations. This patch fixes that.
HabanaAI · Jan 29, 2025 · 2d152ed · 2d152ed
1 parent 446eab2
commit 2d152ed
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/vllm/attention/ops/hpu_paged_attn.py b/vllm/attention/ops/hpu_paged_attn.py
@@ -28,7 +28,7 @@ class HPUPagedAttention:
 
     @staticmethod
     def get_supported_head_sizes() -> List[int]:
-        return [64, 80, 96, 112, 128, 256]
+        return list(range(1, 257))
 
     @staticmethod
     def get_kv_cache_shape(