Skip to content

Commit

Permalink
import OVQuantizer only if it is required in VLM (#1048)
Browse files Browse the repository at this point in the history
* import OVQuantizer only if it is required in VLM

* do not use TaskManager for getting task constant

* avoid import onnx in modeling_base and stateful

* reduce onnx dependency
  • Loading branch information
eaidova authored Dec 5, 2024
1 parent 958eb6e commit eacf098
Show file tree
Hide file tree
Showing 6 changed files with 24 additions and 20 deletions.
2 changes: 1 addition & 1 deletion optimum/exporters/openvino/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
from optimum.exporters import TasksManager
from optimum.exporters.onnx.base import OnnxConfig
from optimum.exporters.onnx.constants import SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED
from optimum.exporters.openvino.convert import export_from_model
from optimum.intel.utils.import_utils import (
is_nncf_available,
is_openvino_tokenizers_available,
Expand Down Expand Up @@ -185,6 +184,7 @@ def main_export(
>>> main_export("gpt2", output="gpt2_ov/")
```
"""
from optimum.exporters.openvino.convert import export_from_model

if use_auth_token is not None:
warnings.warn(
Expand Down
24 changes: 14 additions & 10 deletions optimum/exporters/openvino/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,13 @@
from pathlib import Path
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union

import onnx
from transformers.generation import GenerationMixin
from transformers.utils import is_tf_available, is_torch_available

from openvino.runtime import Model, save_model
from openvino.runtime.exceptions import OVTypeError
from openvino.tools.ovc import convert_model
from optimum.exporters import TasksManager
from optimum.exporters.onnx.base import OnnxConfig
from optimum.exporters.onnx.convert import check_dummy_inputs_are_allowed
from optimum.exporters.onnx.convert import export_pytorch as export_pytorch_to_onnx
from optimum.exporters.onnx.convert import export_tensorflow as export_tensorflow_onnx
from optimum.exporters.utils import (
_get_submodels_and_export_configs as _default_get_submodels_and_export_configs,
)
Expand Down Expand Up @@ -89,6 +84,7 @@


if TYPE_CHECKING:
from optimum.exporters.onnx.base import OnnxConfig
from optimum.intel.openvino.configuration import OVConfig


Expand All @@ -115,7 +111,7 @@ def _save_model(
path: str,
ov_config: Optional["OVConfig"] = None,
library_name: Optional[str] = None,
config: OnnxConfig = None,
config: "OnnxConfig" = None,
):
compress_to_fp16 = ov_config is not None and ov_config.dtype == "fp16"
model = _add_version_info_to_model(model, library_name)
Expand All @@ -129,7 +125,7 @@ def _save_model(

def export(
model: Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin", "DiffusionPipeline"],
config: OnnxConfig,
config: "OnnxConfig",
output: Path,
opset: Optional[int] = None,
device: str = "cpu",
Expand Down Expand Up @@ -212,7 +208,7 @@ def export(

def export_tensorflow(
model: Union["PreTrainedModel", "ModelMixin"],
config: OnnxConfig,
config: "OnnxConfig",
opset: int,
output: Path,
ov_config: Optional["OVConfig"] = None,
Expand All @@ -232,6 +228,8 @@ def export_tensorflow(
output_names: list of output names from ONNX configuration
bool: True if the model was exported successfully.
"""
from optimum.exporters.onnx.convert import export_tensorflow as export_tensorflow_onnx

onnx_path = Path(output).with_suffix(".onnx")
input_names, output_names = export_tensorflow_onnx(model, config, opset, onnx_path)
ov_model = convert_model(str(onnx_path))
Expand All @@ -252,7 +250,7 @@ def export_tensorflow(

def export_pytorch_via_onnx(
model: Union["PreTrainedModel", "ModelMixin"],
config: OnnxConfig,
config: "OnnxConfig",
opset: int,
output: Path,
device: str = "cpu",
Expand Down Expand Up @@ -289,6 +287,8 @@ def export_pytorch_via_onnx(
"""
import torch

from optimum.exporters.onnx.convert import export_pytorch as export_pytorch_to_onnx

output = Path(output)
orig_torch_onnx_export = torch.onnx.export
torch.onnx.export = functools.partial(orig_torch_onnx_export, do_constant_folding=False)
Expand Down Expand Up @@ -317,7 +317,7 @@ def export_pytorch_via_onnx(

def export_pytorch(
model: Union["PreTrainedModel", "ModelMixin"],
config: OnnxConfig,
config: "OnnxConfig",
opset: int,
output: Path,
device: str = "cpu",
Expand Down Expand Up @@ -359,6 +359,8 @@ def export_pytorch(
import torch
from torch.utils._pytree import tree_map

from optimum.exporters.onnx.convert import check_dummy_inputs_are_allowed

logger.info(f"Using framework PyTorch: {torch.__version__}")
output = Path(output)

Expand Down Expand Up @@ -878,6 +880,8 @@ def _add_version_info_to_model(model: Model, library_name: Optional[str] = None)
model.set_rt_info(_nncf_version, ["optimum", "nncf_version"])
input_model = rt_info["conversion_parameters"].get("input_model", None)
if input_model is not None and "onnx" in input_model.value:
import onnx

model.set_rt_info(onnx.__version__, ["optimum", "onnx_version"])

except Exception:
Expand Down
3 changes: 2 additions & 1 deletion optimum/exporters/openvino/stateful.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

import openvino as ov
from openvino.runtime import opset13
from optimum.exporters import TasksManager
from optimum.intel.utils.import_utils import _openvino_version, is_openvino_version, is_transformers_version

from .utils import MULTI_MODAL_TEXT_GENERATION_MODELS
Expand Down Expand Up @@ -192,6 +191,8 @@ def ensure_stateful_is_available(warn=True):


def ensure_export_task_support_stateful(task: str):
from optimum.exporters import TasksManager

task = TasksManager.map_from_synonym(task)
return task in ["text-generation-with-past"]

Expand Down
8 changes: 3 additions & 5 deletions optimum/intel/openvino/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,6 @@
XVectorOutput,
)

from optimum.exporters import TasksManager

from ..utils.import_utils import is_timm_available, is_timm_version
from .modeling_base import OVBaseModel
from .utils import _is_timm_ov_dir
Expand Down Expand Up @@ -695,7 +693,7 @@ class OVModelForCTC(OVModel):
"""

auto_model_class = AutoModelForCTC
export_feature = TasksManager.infer_task_from_model(auto_model_class)
export_feature = "automatic-speech-recognition"

@add_start_docstrings_to_model_forward(
AUDIO_INPUTS_DOCSTRING.format("batch_size, sequence_length")
Expand Down Expand Up @@ -775,7 +773,7 @@ class OVModelForAudioXVector(OVModel):
"""

auto_model_class = AutoModelForAudioXVector
export_feature = TasksManager.infer_task_from_model(auto_model_class)
export_feature = "audio-xvector"

@add_start_docstrings_to_model_forward(
AUDIO_INPUTS_DOCSTRING.format("batch_size, sequence_length")
Expand Down Expand Up @@ -851,7 +849,7 @@ class OVModelForAudioFrameClassification(OVModel):
"""

auto_model_class = AutoModelForAudioFrameClassification
export_feature = TasksManager.infer_task_from_model(auto_model_class)
export_feature = "audio-frame-classification"

@add_start_docstrings_to_model_forward(
AUDIO_INPUTS_DOCSTRING.format("batch_size, sequence_length")
Expand Down
4 changes: 2 additions & 2 deletions optimum/intel/openvino/modeling_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from transformers.generation import GenerationMixin
from transformers.utils import is_offline_mode

from optimum.exporters.onnx import OnnxConfig
from optimum.exporters.base import ExportConfig
from optimum.modeling_base import FROM_PRETRAINED_START_DOCSTRING, OptimizedModel

from ...exporters.openvino import export, main_export
Expand Down Expand Up @@ -623,7 +623,7 @@ def _to_load(
cls,
model,
config: PretrainedConfig,
onnx_config: OnnxConfig,
onnx_config: ExportConfig,
token: Optional[Union[bool, str]] = None,
revision: Optional[str] = None,
force_download: bool = False,
Expand Down
3 changes: 2 additions & 1 deletion optimum/intel/openvino/modeling_visual_language.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
from ...exporters.openvino import main_export
from ...exporters.openvino.stateful import ensure_stateful_is_available, model_has_input_output_name
from ...exporters.openvino.utils import save_config
from .. import OVQuantizer
from .configuration import OVConfig, OVWeightQuantizationConfig
from .modeling_base import OVBaseModel, OVModelPart
from .modeling_decoder import CausalLMOutputWithPast, OVModelForCausalLM
Expand Down Expand Up @@ -549,6 +548,8 @@ def _from_pretrained(
)

if to_quantize:
from optimum.intel.openvino.quantization import OVQuantizer

quantization_config_copy = copy.deepcopy(quantization_config)
quantization_config_copy.tokenizer = quantization_config.tokenizer or model_id
potential_processor_id = config.mm_vision_tower if isinstance(model, _OVNanoLlavaForCausalLM) else model_id
Expand Down

0 comments on commit eacf098

Please sign in to comment.