From ddda03b0c7cabf1d3e6c9048da22aa96ac295d32 Mon Sep 17 00:00:00 2001 From: Sara Adkins Date: Fri, 12 Apr 2024 20:11:52 +0000 Subject: [PATCH] add examples dir --- .../llama_1.1b/ex_sparseml_quantization.py | 41 +++++++++++++++++++ examples/llama_1.1b/example_quant_config.json | 39 ++++++++++++++++++ examples/llama_1.1b/example_quant_recipe.yaml | 0 makefile | 2 +- 4 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 examples/llama_1.1b/ex_sparseml_quantization.py create mode 100644 examples/llama_1.1b/example_quant_config.json create mode 100644 examples/llama_1.1b/example_quant_recipe.yaml diff --git a/examples/llama_1.1b/ex_sparseml_quantization.py b/examples/llama_1.1b/ex_sparseml_quantization.py new file mode 100644 index 00000000..3c66f5e8 --- /dev/null +++ b/examples/llama_1.1b/ex_sparseml_quantization.py @@ -0,0 +1,41 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from sparseml.transformers import oneshot, SparseAutoModelForCausalLM + +dataset_name = "open_platypus" +overwrite_output_dir = True +splits = {"calibration": "train"} +seed = 42 +output_dir = "./llama_1.1b_quant_mod_only" +num_calibration_samples = 1024 +recipe = "example_quant_recipe.yaml" +model_name = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T" +max_seq_length = 1024 +pad_to_max_length = False + +model = SparseAutoModelForCausalLM.from_pretrained(model_name, device_map="cuda:0") + +oneshot( + model=model_name, + dataset=dataset_name, + output_dir=output_dir, + overwrite_output_dir=overwrite_output_dir, + splits = splits, + max_seq_length = max_seq_length, + seed=seed, + num_calibration_samples=num_calibration_samples, + recipe=recipe, + pad_to_max_length=pad_to_max_length +) \ No newline at end of file diff --git a/examples/llama_1.1b/example_quant_config.json b/examples/llama_1.1b/example_quant_config.json new file mode 100644 index 00000000..65d15740 --- /dev/null +++ b/examples/llama_1.1b/example_quant_config.json @@ -0,0 +1,39 @@ +{ + "quant_method": "sparseml", + "format": "fakequant", + "quantization_status": "frozen", + "global_compression_ratio": null, + "config_groups": { + "group_1": { + "weights": { + "num_bits": 8, + "type": "int", + "symmetric": true, + "strategy": "tensor" + }, + "input_activations": { + "num_bits": 8, + "type": "int", + "symmetric": true, + "strategy": "tensor" + }, + "targets": ["Linear"] + }, + "group_2": { + "weights": { + "num_bits": 8, + "type": "int", + "symmetric": false, + "strategy": "tensor" + }, + "input_activations": null, + "targets": ["Embedding"] + } + }, + "ignore": [ + "LlamaRotaryEmbedding", "LlamaRMSNorm", "SiLUActivation", + "model.layers.1.mlp.down_proj", "MatMulLeftInput_QK", "MatMulRightInput_QK", + "MatMulOutput_QK", "MatMulLeftInput_PV", "MatMulRightInput_PV", + "MatMulOutput_PV" + ] +} \ No newline at end of file diff --git a/examples/llama_1.1b/example_quant_recipe.yaml b/examples/llama_1.1b/example_quant_recipe.yaml new file mode 100644 index 00000000..e69de29b diff --git a/makefile b/makefile index 02872127..435a37b9 100644 --- a/makefile +++ b/makefile @@ -1,6 +1,6 @@ BUILDDIR := $(PWD) PYCHECKDIRS := src tests -PYCHECKGLOBS := 'src/**/*.py' 'tests/**/*.py' 'utils/**/*.py' setup.py +PYCHECKGLOBS := 'src/**/*.py' 'tests/**/*.py' 'utils/**/*.py' 'examples/**/*.py' setup.py # run checks on all files for the repo quality: @echo "Running copyright checks";