Skip to content

Commit

Permalink
Update default NNCF configurations (#824)
Browse files Browse the repository at this point in the history
* Add configs from 143530

* Fix wrong AWQ option

* Apply comment

* Add test

* Add missed configuration

* Apply comment
  • Loading branch information
KodiaqQ authored Jul 17, 2024
1 parent cb2f2ec commit 31f49a2
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 17 deletions.
49 changes: 33 additions & 16 deletions optimum/intel/openvino/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,25 @@

logger = logging.getLogger(__name__)


class OVQuantizationMethod(str, Enum):
DEFAULT = "default"
HYBRID = "hybrid"
AWQ = "awq"


_DEFAULT_4BIT_CONFIGS = {
"databricks/dolly-v2-3b": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.8},
"databricks/dolly-v2-3b": {"bits": 4, "sym": False, "group_size": 128, "scale_estimation": True},
"EleutherAI/gpt-j-6b": {"bits": 4, "sym": False, "group_size": 64},
"facebook/opt-6.7b": {"bits": 4, "sym": False, "group_size": 64, "ratio": 0.8},
"bigscience/bloomz-7b1": {"bits": 4, "sym": False, "group_size": 32, "ratio": 0.6},
"togethercomputer/RedPajama-INCITE-7B-Instruct": {"bits": 4, "sym": False, "group_size": 128},
"HuggingFaceH4/zephyr-7b-beta": {
"bits": 4,
"sym": True,
"group_size": 128,
"ratio": 0.8,
"dataset": "wikitext2",
"awq": True,
"quant_method": OVQuantizationMethod.AWQ,
},
"meta-llama/Llama-2-7b": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.6},
"meta-llama/Llama-2-7b-chat": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.8},
Expand All @@ -55,21 +61,21 @@
"group_size": 64,
"ratio": 0.8,
"dataset": "wikitext2",
"awq": True,
"quant_method": OVQuantizationMethod.AWQ,
},
"stabilityai/stablelm-zephyr-3b": {
"bits": 4,
"sym": False,
"group_size": 128,
"ratio": 1.0,
"dataset": "wikitext2",
"awq": True,
"quant_method": OVQuantizationMethod.AWQ,
},
"stabilityai/stable-code-3b": {"bits": 4, "sym": True, "group_size": 64, "ratio": 0.8},
"pansophic/rocket-3B": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.8},
"THUDM/chatglm2-6b": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.72},
"Qwen/Qwen-7B-Chat": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.6},
"openlm-research/open_llama_3b": {"bits": 4, "sym": True, "group_size": 64, "all_layers": True},
"openlm-research/open_llama_3b": {"bits": 4, "sym": False, "group_size": 64, "all_layers": True},
"openlm-research/open_llama_3b_v2": {"bits": 4, "sym": True, "group_size": 64, "all_layers": True},
"tiiuae/falcon-7b-instruct": {"bits": 4, "sym": True, "group_size": 64, "all_layers": True},
"psmathur/orca_mini_3b": {
Expand All @@ -78,19 +84,24 @@
"group_size": 64,
"all_layers": True,
"dataset": "wikitext2",
"awq": True,
"quant_method": OVQuantizationMethod.AWQ,
},
"bigscience/bloomz-560m": {
"bits": 4,
"sym": True,
"group_size": 64,
"ratio": 0.8,
"dataset": "wikitext2",
"awq": True,
"quant_method": OVQuantizationMethod.AWQ,
},
"mistralai/Mixtral-8x7B-v0.1": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.8},
"facebook/opt-2.7b": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.7},
"togethercomputer/RedPajama-INCITE-Chat-3B-v1": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.8},
"togethercomputer/RedPajama-INCITE-Chat-3B-v1": {
"bits": 4,
"sym": False,
"group_size": 128,
"scale_estimation": True,
},
"lmsys/vicuna-7b-v1.5": {"bits": 4, "sym": False, "group_size": 128, "ratio": 1.0},
"stabilityai/stablelm-tuned-alpha-3b": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.8},
"mistralai/Mistral-7B-v0.1": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.9},
Expand All @@ -100,8 +111,20 @@
"group_size": 128,
"ratio": 0.8,
"dataset": "wikitext2",
"awq": True,
"quant_method": OVQuantizationMethod.AWQ,
},
"openai-community/gpt2": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.5, "scale_estimation": True},
"lmsys/longchat-7b-16k": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.9},
"bigcode/starcoder2-3b": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.9},
"TinyLlama/TinyLlama-1.1B-Chat-v1.0": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.8},
"stabilityai/stablelm-tuned-alpha-7b": {
"bits": 4,
"sym": False,
"group_size": 128,
"ratio": 0.6,
"scale_estimation": True,
},
"microsoft/phi-2": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.9},
}

_DEFAULT_4BIT_CONFIG = {
Expand All @@ -113,12 +136,6 @@
}


class OVQuantizationMethod(str, Enum):
DEFAULT = "default"
HYBRID = "hybrid"
AWQ = "awq"


@dataclass
class OVQuantizationConfigBase(QuantizationConfigMixin):
"""
Expand Down
23 changes: 22 additions & 1 deletion tests/openvino/test_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,13 @@
OVWeightQuantizationConfig,
OVDynamicQuantizationConfig,
)
from optimum.intel.openvino.configuration import OVQuantizationMethod, OVQuantizationConfigBase
from optimum.intel.openvino.configuration import (
OVQuantizationMethod,
OVQuantizationConfigBase,
_DEFAULT_4BIT_CONFIGS,
_DEFAULT_4BIT_CONFIG,
)
from copy import deepcopy

from optimum.intel.openvino.quantization import InferRequestWrapper
from optimum.intel.utils.import_utils import is_openvino_version, is_transformers_version
Expand Down Expand Up @@ -820,6 +826,13 @@ class OVQuantizationConfigTest(unittest.TestCase):
(dict(bits=8, fast_bias_correction=True, weight_only=False), OVQuantizationConfig, None),
)

def get_default_configurations() -> dict:
default_configurations = deepcopy(_DEFAULT_4BIT_CONFIGS)
default_configurations.update({"default": _DEFAULT_4BIT_CONFIG})
return default_configurations

DEFAULT_CONFIGURATIONS = get_default_configurations()

@parameterized.expand(QUANTIZATION_CONFIGS)
def test_config_serialization(self, quantization_config: OVQuantizationConfigBase):
ov_config = OVConfig(quantization_config=quantization_config)
Expand Down Expand Up @@ -849,6 +862,14 @@ def test_config_from_dict(self, quantization_config: dict, config_type: type, wa
if hasattr(ov_config.quantization_config, k):
self.assertEqual(getattr(ov_config.quantization_config, k), v)

@parameterized.expand(DEFAULT_CONFIGURATIONS)
def test_named_default_configurations(self, config_id: str):
custom_configuration = self.DEFAULT_CONFIGURATIONS[config_id]
prepared_config = OVModelForCausalLM._prepare_weight_quantization_config(custom_configuration)
for field_name, reference_value in custom_configuration.items():
value = prepared_config.__getattribute__(field_name)
self.assertEqual(value, reference_value)


class InferRequestWrapperTest(unittest.TestCase):
MODEL_ID = ("openai/whisper-tiny.en",)
Expand Down

0 comments on commit 31f49a2

Please sign in to comment.