diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 5a73c6ee02e0c..0aa367a173b6c 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -425,7 +425,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: parser.add_argument('--block-size', type=int, default=EngineArgs.block_size, - choices=[8, 16, 32, 64, 128], + choices=[8, 16, 32], help='Token block size for contiguous chunks of ' 'tokens. This is ignored on neuron devices and ' 'set to max-model-len')