From ca05db034cfb961cbf6efa8137b2c4a7749911db Mon Sep 17 00:00:00 2001
From: Ella Charlaix <80481427+echarlaix@users.noreply.github.com>
Date: Wed, 29 May 2024 16:32:57 +0200
Subject: [PATCH] Bump transformers version (#724)

* Bump transformers version

* fix default ignored scope for model using sdpa by default

* fix quant ops test

* update setup

* add ops quant num gpt2

* fix expected ops quant in test

* update optimum version
---
 .../configs/swin-base-jpqd.json               |  2 -
 .../configs/bert-base-jpqd.json               |  2 -
 .../configs/bert-base-jpqd.json               |  2 -
 optimum/intel/openvino/trainer.py             |  2 -
 setup.py                                      |  4 +-
 tests/openvino/test_modeling.py               |  2 +-
 tests/openvino/test_quantization.py           |  4 +-
 tests/openvino/test_training.py               | 38 ++++++++++---------
 8 files changed, 25 insertions(+), 31 deletions(-)

diff --git a/examples/openvino/image-classification/configs/swin-base-jpqd.json b/examples/openvino/image-classification/configs/swin-base-jpqd.json
index 3f03c276aa..23b2fd3d84 100644
--- a/examples/openvino/image-classification/configs/swin-base-jpqd.json
+++ b/examples/openvino/image-classification/configs/swin-base-jpqd.json
@@ -36,8 +36,6 @@
       "ignored_scopes": [
         "{re}.*__add___[0-1]",
         "{re}.*layer_norm_0",
-        "{re}.*matmul_1",
-        "{re}.*__truediv__*"
       ]
   }
 ]
diff --git a/examples/openvino/question-answering/configs/bert-base-jpqd.json b/examples/openvino/question-answering/configs/bert-base-jpqd.json
index 425bd9f31b..342d327a34 100644
--- a/examples/openvino/question-answering/configs/bert-base-jpqd.json
+++ b/examples/openvino/question-answering/configs/bert-base-jpqd.json
@@ -36,8 +36,6 @@
         "ignored_scopes": [
             "{re}.*__add___[0-1]",
             "{re}.*layer_norm_0",
-            "{re}.*matmul_1",
-            "{re}.*__truediv__*"
         ]
     }
 ]
diff --git a/examples/openvino/text-classification/configs/bert-base-jpqd.json b/examples/openvino/text-classification/configs/bert-base-jpqd.json
index 25c8f28861..d177e4efd7 100644
--- a/examples/openvino/text-classification/configs/bert-base-jpqd.json
+++ b/examples/openvino/text-classification/configs/bert-base-jpqd.json
@@ -40,8 +40,6 @@
         "ignored_scopes": [
             "{re}.*__add___[0-1]",
             "{re}.*layer_norm_0",
-            "{re}.*matmul_1",
-            "{re}.*__truediv__*"
         ]
     }
 ]
diff --git a/optimum/intel/openvino/trainer.py b/optimum/intel/openvino/trainer.py
index c8b29800fa..0a1f5209a4 100644
--- a/optimum/intel/openvino/trainer.py
+++ b/optimum/intel/openvino/trainer.py
@@ -153,8 +153,6 @@
         "{re}.*Embedding.*",
         "{re}.*add___.*",
         "{re}.*layer_norm_.*",
-        "{re}.*matmul_1",
-        "{re}.*__truediv__.*",
     ],
 }
 
diff --git a/setup.py b/setup.py
index 251ec61cdd..d00ce1dd92 100644
--- a/setup.py
+++ b/setup.py
@@ -28,8 +28,8 @@
 
 INSTALL_REQUIRE = [
     "torch>=1.11",
-    "transformers>=4.36.0,<4.41.0",
-    "optimum~=1.19",
+    "transformers>=4.36.0,<4.42.0",
+    "optimum~=1.20",
     "datasets>=1.4.0",
     "sentencepiece",
     "scipy",
diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py
index cb5ac52ed7..1e18fb0663 100644
--- a/tests/openvino/test_modeling.py
+++ b/tests/openvino/test_modeling.py
@@ -1679,7 +1679,7 @@ def test_compare_output_attentions(self, model_arch):
         preprocessor = AutoFeatureExtractor.from_pretrained(model_id)
         inputs = preprocessor(images=image, return_tensors="pt")
 
-        transformers_model = AutoModelForImageClassification.from_pretrained(model_id)
+        transformers_model = AutoModelForImageClassification.from_pretrained(model_id, attn_implementation="eager")
         transformers_model.eval()
         with torch.no_grad():
             transformers_outputs = transformers_model(**inputs, output_attentions=True)
diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py
index 09b395ea12..b7ed36d3e6 100644
--- a/tests/openvino/test_quantization.py
+++ b/tests/openvino/test_quantization.py
@@ -74,7 +74,7 @@
 
 class OVQuantizerTest(unittest.TestCase):
     SUPPORTED_ARCHITECTURES_TORCH_MODEL = (
-        (OVModelForSequenceClassification, "bert", 32, 35),
+        (OVModelForSequenceClassification, "bert", 22, 35),
         (OVModelForCausalLM, "gpt2", 41, 3),
     )
     SUPPORTED_ARCHITECTURES_OV_MODEL = (
@@ -665,7 +665,7 @@ def preprocess_function(examples, tokenizer):
 
 
 class OVTrainerTest(unittest.TestCase):
-    SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = (("distilbert-base-uncased", 49, 38),)
+    SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = (("distilbert-base-uncased", 67, 38),)
 
     @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS)
     def test_aware_training_quantization(self, model_name, expected_fake_quantize, expected_int8):
diff --git a/tests/openvino/test_training.py b/tests/openvino/test_training.py
index c998d00d8b..89d644319c 100644
--- a/tests/openvino/test_training.py
+++ b/tests/openvino/test_training.py
@@ -322,7 +322,7 @@ def tearDown(self):
     "default_quantization": OVTrainerTestDescriptor(
         model_id="hf-internal-testing/tiny-random-bert",
         nncf_compression_config=DEFAULT_QUANTIZATION_CONFIG,
-        expected_fake_quantize=34,
+        expected_fake_quantize=22,
         expected_int8=32,
         compression_metrics=["compression_loss"],
     ),
@@ -330,14 +330,14 @@ def tearDown(self):
         model_id="hf-internal-testing/tiny-random-bert",
         teacher_model_id="hf-internal-testing/tiny-random-bert",
         nncf_compression_config=DEFAULT_QUANTIZATION_CONFIG,
-        expected_fake_quantize=34,
+        expected_fake_quantize=22,
         expected_int8=32,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
     ),
     "customized_quantization": OVTrainerTestDescriptor(
         model_id="hf-internal-testing/tiny-random-bert",
         nncf_compression_config=CUSTOMIZED_QUANTIZATION_CONFIG,
-        expected_fake_quantize=34,
+        expected_fake_quantize=22,
         expected_int8=32,
         compression_metrics=["compression_loss"],
     ),
@@ -345,7 +345,7 @@ def tearDown(self):
         model_id="hf-internal-testing/tiny-random-bert",
         teacher_model_id="hf-internal-testing/tiny-random-bert",
         nncf_compression_config=CUSTOMIZED_QUANTIZATION_CONFIG,
-        expected_fake_quantize=34,
+        expected_fake_quantize=22,
         expected_int8=32,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
     ),
@@ -365,7 +365,7 @@ def tearDown(self):
     "default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor(
         model_id="hf-internal-testing/tiny-random-bert",
         nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
-        expected_fake_quantize=34,
+        expected_fake_quantize=22,
         expected_int8=32,
         expected_binary_masks=60,
         compression_metrics=["compression_loss"],
@@ -376,7 +376,7 @@ def tearDown(self):
             CUSTOMIZED_QUANTIZATION_CONFIG,
             STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
         ],
-        expected_fake_quantize=34,
+        expected_fake_quantize=22,
         expected_int8=32,
         expected_binary_masks=60,
         compression_metrics=["compression_loss"],
@@ -385,7 +385,7 @@ def tearDown(self):
         model_id="hf-internal-testing/tiny-random-bert",
         teacher_model_id="hf-internal-testing/tiny-random-bert",
         nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
-        expected_fake_quantize=34,
+        expected_fake_quantize=22,
         expected_int8=32,
         expected_binary_masks=60,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
@@ -397,7 +397,7 @@ def tearDown(self):
             CUSTOMIZED_QUANTIZATION_CONFIG,
             STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
         ],
-        expected_fake_quantize=34,
+        expected_fake_quantize=22,
         expected_int8=32,
         expected_binary_masks=60,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
@@ -418,7 +418,7 @@ def tearDown(self):
     "default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
         model_id="hf-internal-testing/tiny-random-bert",
         nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
-        expected_fake_quantize=34,
+        expected_fake_quantize=22,
         expected_int8=32,
         expected_binary_masks=60,
         compression_metrics=["compression_loss"],
@@ -429,7 +429,7 @@ def tearDown(self):
             CUSTOMIZED_QUANTIZATION_CONFIG,
             UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
         ],
-        expected_fake_quantize=34,
+        expected_fake_quantize=22,
         expected_int8=32,
         expected_binary_masks=60,
         compression_metrics=["compression_loss"],
@@ -438,7 +438,7 @@ def tearDown(self):
         model_id="hf-internal-testing/tiny-random-bert",
         teacher_model_id="hf-internal-testing/tiny-random-bert",
         nncf_compression_config=[DEFAULT_QUANTIZATION_CONFIG, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
-        expected_fake_quantize=34,
+        expected_fake_quantize=22,
         expected_int8=32,
         expected_binary_masks=60,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
@@ -450,7 +450,7 @@ def tearDown(self):
             CUSTOMIZED_QUANTIZATION_CONFIG,
             UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
         ],
-        expected_fake_quantize=34,
+        expected_fake_quantize=22,
         expected_int8=32,
         expected_binary_masks=60,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
@@ -553,7 +553,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
     "default_quantization": OVTrainerTestDescriptor(
         model_id="yujiepan/tiny-random-swin-patch4-window7-224",
         nncf_compression_config=DEFAULT_QUANTIZATION_CONFIG,
-        expected_fake_quantize=28,
+        expected_fake_quantize=36,
         expected_int8=28,
         compression_metrics=["compression_loss"],
     ),
@@ -572,7 +572,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
     "default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor(
         model_id="yujiepan/tiny-random-swin-patch4-window7-224",
         nncf_compression_config=[STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG],
-        expected_fake_quantize=28,
+        expected_fake_quantize=36,
         expected_int8=28,
         expected_binary_masks=48,
         compression_metrics=["compression_loss"],
@@ -580,7 +580,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
     "default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
         model_id="yujiepan/tiny-random-swin-patch4-window7-224",
         nncf_compression_config=[UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG],
-        expected_fake_quantize=28,
+        expected_fake_quantize=36,
         expected_int8=28,
         expected_binary_masks=48,
         compression_metrics=["compression_loss"],
@@ -589,7 +589,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
         model_id="yujiepan/tiny-random-swin-patch4-window7-224",
         teacher_model_id="yujiepan/tiny-random-swin-patch4-window7-224",
         nncf_compression_config=[STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG],
-        expected_fake_quantize=28,
+        expected_fake_quantize=36,
         expected_int8=28,
         expected_binary_masks=48,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
@@ -598,7 +598,7 @@ def check_ovmodel_reshaping(self, ovmodel: OVModel):
         model_id="yujiepan/tiny-random-swin-patch4-window7-224",
         teacher_model_id="yujiepan/tiny-random-swin-patch4-window7-224",
         nncf_compression_config=[UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG],
-        expected_fake_quantize=28,
+        expected_fake_quantize=36,
         expected_int8=28,
         expected_binary_masks=48,
         compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
@@ -797,7 +797,9 @@ def prepare_model_and_dataset(self, desc: OVTrainerTestDescriptor):
 
         self.feature_extractor = AutoFeatureExtractor.from_pretrained(desc.model_id)
         self.tokenizer = self.feature_extractor
-        self.model = AutoModelForAudioClassification.from_pretrained(desc.model_id, num_labels=self.num_labels)
+        self.model = AutoModelForAudioClassification.from_pretrained(
+            desc.model_id, num_labels=self.num_labels, attn_implementation="eager"
+        )
         self.teacher_model = None
         if desc.teacher_model_id:
             self.teacher_model = AutoModelForAudioClassification.from_pretrained(