Skip to content

Commit

Permalink
add sparsity testing with vllm; give option to not save compressed
Browse files Browse the repository at this point in the history
  • Loading branch information
dsikka committed Jan 14, 2025
1 parent 28fff75 commit 2d56538
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 2 deletions.
7 changes: 7 additions & 0 deletions tests/e2e/vLLM/configs/sparse2of4_fp8_dynamic.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
cadence: "nightly"
test_type: "regression"
model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
recipe: tests/e2e/vLLM/recipes/Sparse_2of4/recipe_sparse_2of4_fp8_dynamic.yaml
scheme: sparse2of4_fp8_dynamic
dataset_id: HuggingFaceH4/ultrachat_200k
dataset_split: train_sft
8 changes: 8 additions & 0 deletions tests/e2e/vLLM/configs/sparse_24.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
cadence: "nightly"
test_type: "regression"
model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
recipe: tests/e2e/vLLM/recipes/Sparse_2of4/recipe_sparse_2of4.yaml
scheme: sparse2of4_only
dataset_id: HuggingFaceH4/ultrachat_200k
dataset_split: train_sft
save_compressed: False
6 changes: 6 additions & 0 deletions tests/e2e/vLLM/recipes/Sparse_2of4/recipe_sparse_2of4.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
sparsity_stage:
sparsity_modifiers:
SparseGPTModifier:
sparsity: 0.5
mask_structure: "2:4"
sequential_update: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
sparsity_stage:
run_type: oneshot
sparsity_modifiers:
SparseGPTModifier:
sparsity: 0.5
mask_structure: "2:4"
sequential_update: false
quantization_stage:
run_type: oneshot
quantization_modifiers:
ConstantPruningModifier:
targets: [
're:.*q_proj.weight',
're:.*k_proj.weight',
're:.*v_proj.weight',
're:.*o_proj.weight',
're:.*gate_proj.weight',
're:.*up_proj.weight',
're:.*down_proj.weight',
]
start: 0
QuantizationModifier:
targets: ["Linear"]
ignore: ["lm_head"]
scheme: "FP8_DYNAMIC"
11 changes: 9 additions & 2 deletions tests/e2e/vLLM/test_vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,15 @@
from tests.e2e.e2e_utils import run_oneshot_for_e2e_testing
from tests.examples.utils import requires_gpu_count

"""
try:
from vllm import LLM, SamplingParams
vllm_installed = True
except ImportError:
vllm_installed = False
logger.warning("vllm is not installed. This test will be skipped")
"""

HF_MODEL_HUB_NAME = "nm-testing"
TEST_DATA_FILE = os.environ.get("TEST_DATA_FILE", "")
Expand All @@ -41,7 +43,7 @@ def record_config_file(record_testsuite_property: Callable[[str, object], None])
# Will run each test case in its own process through run_tests.sh
# emulating vLLM CI testing
@requires_gpu_count(1)
@pytest.mark.skipif(not vllm_installed, reason="vLLM is not installed, skipping test")
# @pytest.mark.skipif(not vllm_installed, reason="vLLM is not installed, skipping test")
class TestvLLM:
"""
The following test quantizes a model using a preset scheme or recipe,
Expand Down Expand Up @@ -73,6 +75,7 @@ def set_up(self):
self.recipe = eval_config.get("recipe")
self.quant_type = eval_config.get("quant_type")
self.save_dir = eval_config.get("save_dir")
self.save_compressed = eval_config.get("save_compressed", True)

logger.info("========== RUNNING ==============")
logger.info(self.scheme)
Expand Down Expand Up @@ -112,7 +115,9 @@ def test_vllm(self):
self._check_session_contains_recipe()

logger.info("================= SAVING TO DISK ======================")
oneshot_model.save_pretrained(self.save_dir)
oneshot_model.save_pretrained(
self.save_dir, save_compressed=self.save_compressed
)
tokenizer.save_pretrained(self.save_dir)
recipe_path = os.path.join(self.save_dir, "recipe.yaml")

Expand All @@ -134,6 +139,7 @@ def test_vllm(self):
folder_path=self.save_dir,
)

"""
logger.info("================= RUNNING vLLM =========================")
sampling_params = SamplingParams(temperature=0.80, top_p=0.95)
Expand All @@ -156,6 +162,7 @@ def test_vllm(self):
logger.info(generated_text)
self.tear_down()
"""

def tear_down(self):
if self.save_dir is not None:
Expand Down

0 comments on commit 2d56538

Please sign in to comment.