Skip to content

Commit

Permalink
[Core] Make scheduling policy settable via EngineArgs (vllm-project#8956
Browse files Browse the repository at this point in the history
)
  • Loading branch information
schoennenbeck authored Sep 30, 2024
1 parent 2ae25f7 commit be76e5a
Showing 1 changed file with 14 additions and 2 deletions.
16 changes: 14 additions & 2 deletions vllm/engine/arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
import dataclasses
import json
from dataclasses import dataclass
from typing import (TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Tuple,
Type, Union)
from typing import (TYPE_CHECKING, Any, Dict, List, Literal, Mapping, Optional,
Tuple, Type, Union)

import torch

Expand Down Expand Up @@ -177,6 +177,7 @@ class EngineArgs:
disable_async_output_proc: bool = False
override_neuron_config: Optional[Dict[str, Any]] = None
mm_processor_kwargs: Optional[Dict[str, Any]] = None
scheduling_policy: Literal["fcfs", "priority"] = "fcfs"

def __post_init__(self):
if self.tokenizer is None:
Expand Down Expand Up @@ -797,6 +798,16 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
default=None,
help="override or set neuron device configuration.")

parser.add_argument(
'--scheduling-policy',
choices=['fcfs', 'priority'],
default="fcfs",
help='The scheduling policy to use. "fcfs" (first come first served'
', i.e. requests are handled in order of arrival; default) '
'or "priority" (requests are handled based on given '
'priority (lower value means earlier handling) and time of '
'arrival deciding any ties).')

return parser

@classmethod
Expand Down Expand Up @@ -1011,6 +1022,7 @@ def create_engine_config(self) -> EngineConfig:
multi_step_stream_outputs=self.multi_step_stream_outputs,
send_delta_data=(envs.VLLM_USE_RAY_SPMD_WORKER
and parallel_config.use_ray),
policy=self.scheduling_policy,
)
lora_config = LoRAConfig(
max_lora_rank=self.max_lora_rank,
Expand Down

0 comments on commit be76e5a

Please sign in to comment.