revert examples, rename arg

Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
vllm-project · Feb 4, 2025 · 7b3f434 · 7b3f434
1 parent f87a78f
commit 7b3f434
Show file tree

Hide file tree

Showing 4 changed files with 5 additions and 4 deletions.
diff --git a/examples/multimodal_vision/phi3_vision_example.py b/examples/multimodal_vision/phi3_vision_example.py
@@ -3,6 +3,7 @@
 from transformers import AutoModelForCausalLM, AutoProcessor
 
 from llmcompressor.modifiers.quantization import GPTQModifier
+from llmcompressor.modifiers.smoothquant import SmoothQuantModifier
 from llmcompressor.transformers import oneshot
 
 # Load model.
@@ -15,6 +16,7 @@
     _attn_implementation="eager",
 )
 processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
+processor.chat_template = processor.tokenizer.chat_template
 
 # Oneshot arguments
 DATASET_ID = "lmms-lab/flickr30k"
@@ -66,6 +68,7 @@ def data_collator(batch):
 
 # Recipe
 recipe = [
+    SmoothQuantModifier(smoothing_strength=0.8),
     GPTQModifier(
         targets="Linear",
         scheme="W4A16",

diff --git a/examples/quantization_w4a16/llama3_example.py b/examples/quantization_w4a16/llama3_example.py
@@ -22,7 +22,6 @@
 # Increasing the number of samples can improve accuracy.
 NUM_CALIBRATION_SAMPLES = 512
 MAX_SEQUENCE_LENGTH = 2048
-BATCH_SIZE = 8
 
 # Load dataset and preprocess.
 ds = load_dataset(DATASET_ID, split=DATASET_SPLIT)
@@ -65,7 +64,6 @@ def tokenize(sample):
     recipe=recipe,
     max_seq_length=MAX_SEQUENCE_LENGTH,
     num_calibration_samples=NUM_CALIBRATION_SAMPLES,
-    per_device_oneshot_batch_size=BATCH_SIZE,
 )
 
 # Confirm generations of the quantized model look sane.

diff --git a/src/llmcompressor/transformers/finetune/runner.py b/src/llmcompressor/transformers/finetune/runner.py
@@ -144,7 +144,7 @@ def one_shot(self, stage: Optional[str] = None):
             calib_data = format_calibration_data(
                 tokenized_dataset=self.get_dataset_split("calibration"),
                 num_calibration_samples=self._data_args.num_calibration_samples,
-                batch_size=self._training_args.per_device_oneshot_batch_size,
+                batch_size=self._training_args.oneshot_batch_size,
                 do_shuffle=self._data_args.shuffle_calibration_samples,
                 collate_fn=self._data_args.data_collator,
                 processor=self.processor,

diff --git a/src/llmcompressor/transformers/finetune/training_args.py b/src/llmcompressor/transformers/finetune/training_args.py
@@ -32,7 +32,7 @@ class TrainingArguments(HFTrainingArgs):
             )
         },
     )
-    per_device_oneshot_batch_size: int = field(
+    oneshot_batch_size: int = field(
         default=1,
         metadata={
             "help": "The batch size per GPU/XPU/TPU/MPS/NPU core/CPU for oneshot"