Skip to content

Commit

Permalink
Merge branch 'main' into unique-deterministic-e2e-test-names
Browse files Browse the repository at this point in the history
  • Loading branch information
dbarbuzzi authored Dec 10, 2024
2 parents 9271e89 + 606aab2 commit 2eeda6b
Show file tree
Hide file tree
Showing 6 changed files with 170 additions and 11 deletions.
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@
"pytest-mock>=3.6.0",
"pytest-rerunfailures>=13.0",
"parameterized",
"lm_eval==0.4.5",
# example test dependencies
"beautifulsoup4~=4.12.3",
"cmarkgfm~=2024.1.14",
Expand Down
8 changes: 8 additions & 0 deletions tests/e2e/vLLM/lm_eval_configs/fp8_dynamic_per_token.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
cadence: "weekly"
model: meta-llama/Meta-Llama-3-8B-Instruct
scheme: FP8_DYNAMIC
num_fewshot: 5
limit: 1000
task: "gsm8k"
exact_match,flexible-extract: 0.753
exact_match,strict-match: 0.753
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
cadence: "weekly"
model: meta-llama/Meta-Llama-3-8B-Instruct
scheme: INT8
num_fewshot: 5
limit: 250
task: "gsm8k"
exact_match,flexible-extract: 0.728
exact_match,strict-match: 0.728
20 changes: 16 additions & 4 deletions tests/e2e/vLLM/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,22 @@

SUCCESS=0

# Parse list of configs.
MODEL_CONFIGS="$PWD/tests/e2e/vLLM/configs"
while getopts "c:t:" OPT; do
case ${OPT} in
c )
CONFIG="$OPTARG"
;;
t )
TEST="$OPTARG"
;;
\? )
exit 1
;;
esac
done

for MODEL_CONFIG in "$MODEL_CONFIGS"/*
# Parse list of configs.
for MODEL_CONFIG in "$CONFIG"/*
do
LOCAL_SUCCESS=0

Expand All @@ -16,7 +28,7 @@ do
-r a \
--capture=tee-sys \
--junitxml="test-results/e2e-$(date +%s).xml" \
"$PWD/tests/e2e/vLLM/test_vllm.py" || LOCAL_SUCCESS=$?
"$TEST" || LOCAL_SUCCESS=$?

if [[ $LOCAL_SUCCESS == 0 ]]; then
echo "=== PASSED MODEL: $MODEL_CONFIG ==="
Expand Down
131 changes: 131 additions & 0 deletions tests/e2e/vLLM/test_lmeval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import os
import shutil
from pathlib import Path

import numpy
import pytest
import yaml
from loguru import logger

from llmcompressor.core import active_session
from tests.e2e.e2e_utils import run_oneshot_for_e2e_testing
from tests.examples.utils import requires_gpu_count

try:
import lm_eval

lm_eval_installed = True
except ImportError:
lm_eval_installed = False
logger.warning("lm_eval is not installed. This test will be skipped")

TEST_DATA_FILE = os.environ.get("TEST_DATA_FILE", None)


# Will run each test case in its own process through run_tests.sh
# emulating vLLM CI testing
@requires_gpu_count(1)
@pytest.mark.skipif(
not lm_eval_installed, reason="lm eval is not installed, skipping test"
)
class TestLMEval:
"""
The following test quantizes a model using a preset scheme or recipe,
and then evaluates the model using LM Eval. Each test case is focused on a
specific quantization type (e.g W4A16 with grouped quantization,
W4N16 with channel quantization). To add a new test case, a new config has to be
added to the lm_eval_configs folder. The tests run on a cadence defined by the
`cadence` field. Each config defines the model to quantize. Optionally, a dataset
id and split can be provided for calibration. Finally, all config files must list
a scheme. The scheme can be a preset scheme from
https://github.com/neuralmagic/compressed-tensors/blob/main/src/compressed_tensors/quantization/quant_scheme.py
or another identifier which can be used for the particular test case. If a recipe
is not provided, it is assumed that the scheme provided is a preset scheme and will
be used for quantization. Otherwise, the recipe will always be used if given.
""" # noqa: E501

def set_up(self):
eval_config = yaml.safe_load(Path(TEST_DATA_FILE).read_text(encoding="utf-8"))

if os.environ.get("CADENCE", "commit") != eval_config.get("cadence"):
pytest.skip("Skipping test; cadence mismatch")

self.model = eval_config["model"]
self.scheme = eval_config.get("scheme")
self.dataset_id = eval_config.get("dataset_id")
self.dataset_config = eval_config.get("dataset_config")
self.dataset_split = eval_config.get("dataset_split")
self.recipe = eval_config.get("recipe")
self.quant_type = eval_config.get("quant_type")
self.save_dir = eval_config.get("save_dir")
self.task = eval_config.get("task")
self.num_fewshot = eval_config.get("num_fewshot")
self.limit = eval_config.get("limit")
self.exact_flex = eval_config.get("exact_match,flexible-extract")
self.exact_strict = eval_config.get("exact_match,strict-match")

logger.info("========== RUNNING ==============")
logger.info(self.scheme)

self.device = "cuda:0"
self.num_calibration_samples = 256
self.max_seq_length = 2048

def test_lm_eval(self):
# Run vLLM with saved model
self.set_up()
if not self.save_dir:
self.save_dir = self.model.split("/")[1] + f"-{self.scheme}"
oneshot_model, tokenizer = run_oneshot_for_e2e_testing(
model=self.model,
device=self.device,
num_calibration_samples=self.num_calibration_samples,
max_seq_length=self.max_seq_length,
scheme=self.scheme,
dataset_id=self.dataset_id,
dataset_config=self.dataset_config,
dataset_split=self.dataset_split,
recipe=self.recipe,
quant_type=self.quant_type,
)

logger.info("================= SAVING TO DISK ======================")
oneshot_model.save_pretrained(self.save_dir)
tokenizer.save_pretrained(self.save_dir)
recipe_path = os.path.join(self.save_dir, "recipe.yaml")

# Use the session to fetch the recipe;
# Reset session for next test case
session = active_session()
recipe_yaml_str = session.get_serialized_recipe()
with open(recipe_path, "w") as fp:
fp.write(recipe_yaml_str)
session.reset()

logger.info("================= Running LM Eval ======================")

model_args = f"pretrained={self.save_dir}"
results = lm_eval.simple_evaluate(
model="hf",
model_args=model_args,
tasks=[self.task],
num_fewshot=self.num_fewshot,
limit=self.limit,
device="cuda:0",
batch_size=100,
)

metrics = results["results"][self.task]
exact_match_strict = metrics.get("exact_match,strict-match")
exact_match_flex = metrics.get("exact_match,flexible-extract")
logger.info("Exact Match, Strict")
logger.info(exact_match_strict)
logger.info("Exact Match, Flex")
logger.info(exact_match_flex)
assert numpy.isclose(exact_match_strict, self.exact_strict, rtol=0.05)
assert numpy.isclose(exact_match_flex, self.exact_flex, rtol=0.05)
self.tear_down()

def tear_down(self):
if self.save_dir is not None:
shutil.rmtree(self.save_dir)
13 changes: 6 additions & 7 deletions tests/e2e/vLLM/test_vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,12 @@ class TestvLLM:
runs the model using vLLM, and then pushes the model to the hub for
future use. Each test case is focused on a specific quantization type
(e.g W4A16 with grouped quantization, W4N16 with channel quantization).
To add a new test case, a new config has to be added to one of the folders
listed in the `CONFIGS` folder. If the test case is for a data type not listed
in `CONFIGS`, a new folder can be created and added to the list. The tests
run on a cadence defined by the `cadence` field. Each config defines the model
to quantize. Optionally, a dataset id and split can be provided for calibration.
Finally, all config files must list a scheme. The scheme can be a preset scheme
from https://github.com/neuralmagic/compressed-tensors/blob/main/src/compressed_tensors/quantization/quant_scheme.py
To add a new test case, a new config has to be added to the `configs` folder.
The tests run on a cadence defined by the `cadence` field. Each config defines
the model to quantize. Optionally, a dataset id and split can be provided for
calibration. Finally, all config files must list a scheme. The scheme can be a
preset scheme from
https://github.com/neuralmagic/compressed-tensors/blob/main/src/compressed_tensors/quantization/quant_scheme.py
or another identifier which can be used for the particular test case. If a recipe
is not provided, it is assumed that the scheme provided is a preset scheme and will
be used for quantization. Otherwise, the recipe will always be used if given.
Expand Down

0 comments on commit 2eeda6b

Please sign in to comment.