Skip to content

Commit

Permalink
Try to resolve default int4 config for local models (#760)
Browse files Browse the repository at this point in the history
* try to resolve default int4 config for local models

* Update optimum/commands/export/openvino.py

* apply review comment

* add test case

* improve test

* update tests
  • Loading branch information
eaidova authored Jun 13, 2024
1 parent f20e5b8 commit 0a6075b
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 10 deletions.
38 changes: 28 additions & 10 deletions optimum/commands/export/openvino.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.
"""Defines the command line for the export with OpenVINO."""

import json
import logging
import sys
from pathlib import Path
Expand Down Expand Up @@ -212,6 +213,32 @@ def run(self):
from ...exporters.openvino.__main__ import infer_task, main_export, maybe_convert_tokenizers
from ...intel.openvino.configuration import _DEFAULT_4BIT_CONFIGS, OVConfig

def _get_default_int4_config(model_id_or_path, library_name):
if model_id_or_path in _DEFAULT_4BIT_CONFIGS:
return _DEFAULT_4BIT_CONFIGS[model_id_or_path]
if "transformers" in library_name and (Path(model_id_or_path) / "config.json").exists():
with (Path(model_id_or_path) / "config.json").open("r") as config_f:
config = json.load(config_f)
original_model_name = config.get("_name_or_path", "")
if original_model_name in _DEFAULT_4BIT_CONFIGS:
return _DEFAULT_4BIT_CONFIGS[original_model_name]

return {
"bits": 4,
"ratio": 0.8,
"sym": False,
"group_size": None,
"all_layers": None,
}

library_name = TasksManager.infer_library_from_model(self.args.model, library_name=self.args.library)
if library_name == "sentence_transformers" and self.args.library is None:
logger.warning(
"Library name is not specified. There are multiple possible variants: `sentence_transformers`, `transformers`."
"`transformers` will be selected. If you want to load your model with the `sentence-transformers` library instead, please set --library sentence_transformers"
)
library_name = "transformers"

if self.args.fp16:
logger.warning(
"`--fp16` option is deprecated and will be removed in a future version. Use `--weight-format` instead."
Expand Down Expand Up @@ -241,9 +268,8 @@ def run(self):
and self.args.num_samples is None
and self.args.awq is None
and self.args.sensitivity_metric is None
and self.args.model in _DEFAULT_4BIT_CONFIGS
):
quantization_config = _DEFAULT_4BIT_CONFIGS[self.args.model]
quantization_config = _get_default_int4_config(self.args.model, library_name)
else:
quantization_config = {
"bits": 8 if is_int8 else 4,
Expand All @@ -265,14 +291,6 @@ def run(self):
quantization_config["group_size"] = 128 if "128" in self.args.weight_format else 64
ov_config = OVConfig(quantization_config=quantization_config)

library_name = TasksManager.infer_library_from_model(self.args.model, library_name=self.args.library)
if library_name == "sentence_transformers" and self.args.library is None:
logger.warning(
"Library name is not specified. There are multiple possible variants: `sentence_transformers`, `transformers`."
"`transformers` will be selected. If you want to load your model with the `sentence-transformers` library instead, please set --library sentence_transformers"
)
library_name = "transformers"

if self.args.convert_tokenizer:
logger.warning("`--convert-tokenizer` option is deprecated. Tokenizer will be converted by default.")

Expand Down
34 changes: 34 additions & 0 deletions tests/openvino/test_exporters_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from tempfile import TemporaryDirectory

from parameterized import parameterized
from transformers import AutoModelForCausalLM
from utils_tests import (
_ARCHITECTURES_TO_EXPECTED_INT8,
MODEL_NAMES,
Expand All @@ -38,6 +39,7 @@
OVStableDiffusionPipeline,
OVStableDiffusionXLPipeline,
)
from optimum.intel.openvino.configuration import _DEFAULT_4BIT_CONFIGS
from optimum.intel.openvino.utils import _HEAD_TO_AUTOMODELS
from optimum.intel.utils.import_utils import is_openvino_tokenizers_available

Expand Down Expand Up @@ -219,6 +221,38 @@ def test_exporters_cli_int4(self, task: str, model_type: str, option: str, expec
self.assertEqual(expected_int4, num_int4)
self.assertTrue("--awq" not in option or b"Applying AWQ" in result.stdout)

def test_exporters_cli_int4_with_local_model_and_default_config(self):
with TemporaryDirectory() as tmpdir:
pt_model = AutoModelForCausalLM.from_pretrained(MODEL_NAMES["bloom"])
# overload for matching with default configuration
pt_model.config._name_or_path = "bigscience/bloomz-7b1"
pt_model.save_pretrained(tmpdir)
subprocess.run(
f"optimum-cli export openvino --model {tmpdir} --task text-generation-with-past --weight-format int4 {tmpdir}",
shell=True,
check=True,
)

model = OVModelForCausalLM.from_pretrained(tmpdir)
rt_info = model.model.get_rt_info()
self.assertTrue("nncf" in rt_info)
self.assertTrue("weight_compression" in rt_info["nncf"])
default_config = _DEFAULT_4BIT_CONFIGS["bigscience/bloomz-7b1"]
model_weight_compression_config = rt_info["nncf"]["weight_compression"]
sym = default_config.pop("sym", False)
bits = default_config.pop("bits", None)
self.assertEqual(bits, 4)

mode = f'int{bits}_{"sym" if sym else "asym"}'
default_config["mode"] = mode
for key, value in default_config.items():
self.assertTrue(key in model_weight_compression_config)
self.assertEqual(
model_weight_compression_config[key].value,
str(value),
f"Parameter {key} not matched with expected, {model_weight_compression_config[key].value} != {value}",
)

def test_exporters_cli_help(self):
subprocess.run(
"optimum-cli export openvino --help",
Expand Down

0 comments on commit 0a6075b

Please sign in to comment.