Try to resolve default int4 config for local models (#760)

* try to resolve default int4 config for local models * Update optimum/commands/export/openvino.py * apply review comment * add test case * improve test * update tests
huggingface · Jun 13, 2024 · 0a6075b · 0a6075b
1 parent f20e5b8
commit 0a6075b
Show file tree

Hide file tree

Showing 2 changed files with 62 additions and 10 deletions.
diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 """Defines the command line for the export with OpenVINO."""
 
+import json
 import logging
 import sys
 from pathlib import Path
@@ -212,6 +213,32 @@ def run(self):
         from ...exporters.openvino.__main__ import infer_task, main_export, maybe_convert_tokenizers
         from ...intel.openvino.configuration import _DEFAULT_4BIT_CONFIGS, OVConfig
 
+        def _get_default_int4_config(model_id_or_path, library_name):
+            if model_id_or_path in _DEFAULT_4BIT_CONFIGS:
+                return _DEFAULT_4BIT_CONFIGS[model_id_or_path]
+            if "transformers" in library_name and (Path(model_id_or_path) / "config.json").exists():
+                with (Path(model_id_or_path) / "config.json").open("r") as config_f:
+                    config = json.load(config_f)
+                    original_model_name = config.get("_name_or_path", "")
+                if original_model_name in _DEFAULT_4BIT_CONFIGS:
+                    return _DEFAULT_4BIT_CONFIGS[original_model_name]
+
+            return {
+                "bits": 4,
+                "ratio": 0.8,
+                "sym": False,
+                "group_size": None,
+                "all_layers": None,
+            }
+
+        library_name = TasksManager.infer_library_from_model(self.args.model, library_name=self.args.library)
+        if library_name == "sentence_transformers" and self.args.library is None:
+            logger.warning(
+                "Library name is not specified. There are multiple possible variants: `sentence_transformers`, `transformers`."
+                "`transformers` will be selected. If you want to load your model with the `sentence-transformers` library instead, please set --library sentence_transformers"
+            )
+            library_name = "transformers"
+
         if self.args.fp16:
             logger.warning(
                 "`--fp16` option is deprecated and will be removed in a future version. Use `--weight-format` instead."
@@ -241,9 +268,8 @@ def run(self):
                 and self.args.num_samples is None
                 and self.args.awq is None
                 and self.args.sensitivity_metric is None
-                and self.args.model in _DEFAULT_4BIT_CONFIGS
             ):
-                quantization_config = _DEFAULT_4BIT_CONFIGS[self.args.model]
+                quantization_config = _get_default_int4_config(self.args.model, library_name)
             else:
                 quantization_config = {
                     "bits": 8 if is_int8 else 4,
@@ -265,14 +291,6 @@ def run(self):
                 quantization_config["group_size"] = 128 if "128" in self.args.weight_format else 64
             ov_config = OVConfig(quantization_config=quantization_config)
 
-        library_name = TasksManager.infer_library_from_model(self.args.model, library_name=self.args.library)
-        if library_name == "sentence_transformers" and self.args.library is None:
-            logger.warning(
-                "Library name is not specified. There are multiple possible variants: `sentence_transformers`, `transformers`."
-                "`transformers` will be selected. If you want to load your model with the `sentence-transformers` library instead, please set --library sentence_transformers"
-            )
-            library_name = "transformers"
-
         if self.args.convert_tokenizer:
             logger.warning("`--convert-tokenizer` option is deprecated. Tokenizer will be converted by default.")
 

diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py
@@ -17,6 +17,7 @@
 from tempfile import TemporaryDirectory
 
 from parameterized import parameterized
+from transformers import AutoModelForCausalLM
 from utils_tests import (
     _ARCHITECTURES_TO_EXPECTED_INT8,
     MODEL_NAMES,
@@ -38,6 +39,7 @@
     OVStableDiffusionPipeline,
     OVStableDiffusionXLPipeline,
 )
+from optimum.intel.openvino.configuration import _DEFAULT_4BIT_CONFIGS
 from optimum.intel.openvino.utils import _HEAD_TO_AUTOMODELS
 from optimum.intel.utils.import_utils import is_openvino_tokenizers_available
 
@@ -219,6 +221,38 @@ def test_exporters_cli_int4(self, task: str, model_type: str, option: str, expec
             self.assertEqual(expected_int4, num_int4)
             self.assertTrue("--awq" not in option or b"Applying AWQ" in result.stdout)
 
+    def test_exporters_cli_int4_with_local_model_and_default_config(self):
+        with TemporaryDirectory() as tmpdir:
+            pt_model = AutoModelForCausalLM.from_pretrained(MODEL_NAMES["bloom"])
+            # overload for matching with default configuration
+            pt_model.config._name_or_path = "bigscience/bloomz-7b1"
+            pt_model.save_pretrained(tmpdir)
+            subprocess.run(
+                f"optimum-cli export openvino --model {tmpdir} --task text-generation-with-past --weight-format int4 {tmpdir}",
+                shell=True,
+                check=True,
+            )
+
+            model = OVModelForCausalLM.from_pretrained(tmpdir)
+            rt_info = model.model.get_rt_info()
+            self.assertTrue("nncf" in rt_info)
+            self.assertTrue("weight_compression" in rt_info["nncf"])
+            default_config = _DEFAULT_4BIT_CONFIGS["bigscience/bloomz-7b1"]
+            model_weight_compression_config = rt_info["nncf"]["weight_compression"]
+            sym = default_config.pop("sym", False)
+            bits = default_config.pop("bits", None)
+            self.assertEqual(bits, 4)
+
+            mode = f'int{bits}_{"sym" if sym else "asym"}'
+            default_config["mode"] = mode
+            for key, value in default_config.items():
+                self.assertTrue(key in model_weight_compression_config)
+                self.assertEqual(
+                    model_weight_compression_config[key].value,
+                    str(value),
+                    f"Parameter {key} not matched with expected, {model_weight_compression_config[key].value} != {value}",
+                )
+
     def test_exporters_cli_help(self):
         subprocess.run(
             "optimum-cli export openvino --help",