diff --git a/vllm/model_executor/models/mixtral.py b/vllm/model_executor/models/mixtral.py index d3471959a1766..1cdf1c5f18ca5 100644 --- a/vllm/model_executor/models/mixtral.py +++ b/vllm/model_executor/models/mixtral.py @@ -450,8 +450,8 @@ def __init__( lora_config: Optional[LoRAConfig] = None, ) -> None: super().__init__() - # TODO keep the fused mixtral_quant codepath around as long as we don't - # support all quant_types + # TODO keep the unfused mixtral_quant-like codepath around as long as + # we don't support all quant_types self.is_compressed = isinstance(quant_config, CompressedTensorsConfig) self.use_fused_moe = ( self.is_compressed