Skip to content

Commit

Permalink
review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
LucasWilkinson committed Oct 7, 2024
1 parent 1792c9f commit a7367ed
Showing 1 changed file with 33 additions and 8 deletions.
41 changes: 33 additions & 8 deletions examples/offline_profile.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
import argparse
import inspect
import json
import sys
from argparse import RawTextHelpFormatter
from dataclasses import asdict, dataclass
from typing import Optional

import torch

from vllm import LLM, SamplingParams
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
from vllm.profiler import layerwise_profile
from vllm.utils import FlexibleArgumentParser

BATCH_SIZE_DEFAULT = 1
PROMPT_LEN_DEFAULT = 256
Expand Down Expand Up @@ -194,7 +196,31 @@ def run_profile(context: ProfileContext, csv_output: Optional[str],


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser = FlexibleArgumentParser(description="""
Profile a model
example:
```
python examples/offline_profile.py \\
--model neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8 --batch-size 4 \\
--prompt-len 512 --max-num-batched-tokens 8196 --json Llama31-8b-FP8
```
then you can use various tools to analyze the json output
terminal ascii tables:
```
python tools/profiler/print_layerwise_table.py \\
--json-trace Llama31-8b-FP8.json --phase prefill --table summary
```
or create matplotlib stacked bar charts:
```
python tools/profiler/visualize_layerwise_profile.py \\
--json-trace Llama31-8b-FP8.json \\
--output-directory profile_breakdown --plot-metric pct_cuda_time
```
""",
formatter_class=RawTextHelpFormatter)

parser.add_argument(
"--model",
Expand All @@ -221,12 +247,11 @@ def run_profile(context: ProfileContext, csv_output: Optional[str],
type=str,
default=None,
help="Export the results as a json file. This should be the filename")
parser.add_argument(
"--quantization",
"-q",
type=str,
choices=[*QUANTIZATION_METHODS, None],
default=None)
parser.add_argument("--quantization",
"-q",
type=str,
choices=[*QUANTIZATION_METHODS, None],
default=None)
parser.add_argument("--dtype",
type=str,
default='auto',
Expand Down

0 comments on commit a7367ed

Please sign in to comment.