diff --git a/vllm/model_executor/models/gemma2.py b/vllm/model_executor/models/gemma2.py index c1223bb54b327..c2b5ef07a3b4c 100644 --- a/vllm/model_executor/models/gemma2.py +++ b/vllm/model_executor/models/gemma2.py @@ -91,7 +91,8 @@ def __init__(self, rope_theta: float, cache_config: Optional[CacheConfig] = None, quant_config: Optional[QuantizationConfig] = None, - attn_logits_soft_cap: Optional[float] = None) -> None: + attn_logits_soft_cap: Optional[float] = None, + layer_index: int = 0) -> None: super().__init__() self.layer_idx = layer_idx self.config = config @@ -195,6 +196,7 @@ def __init__( cache_config=cache_config, quant_config=quant_config, attn_logits_soft_cap=config.attn_logit_softcapping, + layer_index=layer_idx, ) self.hidden_size = config.hidden_size self.mlp = Gemma2MLP(