diff --git a/vllm/model_executor/layers/linear.py b/vllm/model_executor/layers/linear.py index 1d2a8782a96f1..6a0f55101e660 100644 --- a/vllm/model_executor/layers/linear.py +++ b/vllm/model_executor/layers/linear.py @@ -104,6 +104,8 @@ def apply_weights(self, if bias is not None: return F.linear(x, weight) + bias return F.linear(x, weight) + elif bias is not None: + return F.linear(x, weight, bias) return tgemm.mm(x, weight)