From 5af0206e449435b9601b5dc3a5f270149f23940f Mon Sep 17 00:00:00 2001 From: eaidova Date: Wed, 4 Dec 2024 09:08:42 +0400 Subject: [PATCH] fix code style --- optimum/exporters/openvino/model_configs.py | 6 +++--- optimum/exporters/openvino/model_patcher.py | 21 +++++++++---------- optimum/exporters/openvino/utils.py | 10 ++++++++- .../openvino/modeling_visual_language.py | 1 - 4 files changed, 22 insertions(+), 16 deletions(-) diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py index 4ec92a6302..e6bbb2fc48 100644 --- a/optimum/exporters/openvino/model_configs.py +++ b/optimum/exporters/openvino/model_configs.py @@ -112,9 +112,9 @@ def init_model_configs(): "transformers", "Qwen2VLForConditionalGeneration", ) - TasksManager._TRANSFORMERS_TASKS_TO_MODEL_LOADERS["image-text-to-text"] = ( - TasksManager._TRANSFORMERS_TASKS_TO_MODEL_LOADERS["text-generation"] - ) + TasksManager._TRANSFORMERS_TASKS_TO_MODEL_LOADERS[ + "image-text-to-text" + ] = TasksManager._TRANSFORMERS_TASKS_TO_MODEL_LOADERS["text-generation"] supported_model_types = [ "_SUPPORTED_MODEL_TYPE", diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py index af830849eb..73b9ce7cef 100644 --- a/optimum/exporters/openvino/model_patcher.py +++ b/optimum/exporters/openvino/model_patcher.py @@ -423,9 +423,9 @@ def _llama_gemma_update_causal_mask_legacy(self, attention_mask, input_tensor, c offset = 0 mask_shape = attention_mask.shape mask_slice = (attention_mask.eq(0.0)).to(dtype=dtype) * min_dtype - causal_mask[: mask_shape[0], : mask_shape[1], offset : mask_shape[2] + offset, : mask_shape[3]] = ( - mask_slice - ) + causal_mask[ + : mask_shape[0], : mask_shape[1], offset : mask_shape[2] + offset, : mask_shape[3] + ] = mask_slice if ( self.config._attn_implementation == "sdpa" @@ -2060,9 +2060,9 @@ def _dbrx_update_causal_mask_legacy( offset = 0 mask_shape = attention_mask.shape mask_slice = (attention_mask.eq(0.0)).to(dtype=dtype) * min_dtype - causal_mask[: mask_shape[0], : mask_shape[1], offset : mask_shape[2] + offset, : mask_shape[3]] = ( - mask_slice - ) + causal_mask[ + : mask_shape[0], : mask_shape[1], offset : mask_shape[2] + offset, : mask_shape[3] + ] = mask_slice if ( self.config._attn_implementation == "sdpa" @@ -3386,10 +3386,9 @@ class Qwen2VLLanguageModelPatcher(DecoderModelPatcher): def __init__( self, config: OnnxConfig, - model: PreTrainedModel | TFPreTrainedModel, - model_kwargs: Dict[str, Any] | None = None, + model: Union[PreTrainedModel, TFPreTrainedModel], + model_kwargs: Dict[str, Any] = None, ): - model.__orig_forward = model.forward def forward_wrap( @@ -3426,8 +3425,8 @@ class Qwen2VLVisionEmbMergerPatcher(ModelPatcher): def __init__( self, config: OnnxConfig, - model: PreTrainedModel | TFPreTrainedModel, - model_kwargs: Dict[str, Any] | None = None, + model: Union[PreTrainedModel, TFPreTrainedModel], + model_kwargs: Dict[str, Any] = None, ): model.__orig_forward = model.forward diff --git a/optimum/exporters/openvino/utils.py b/optimum/exporters/openvino/utils.py index 5242db1d1a..9891395a38 100644 --- a/optimum/exporters/openvino/utils.py +++ b/optimum/exporters/openvino/utils.py @@ -216,7 +216,15 @@ def get_submodels(model): return custom_export, fn_get_submodels -MULTI_MODAL_TEXT_GENERATION_MODELS = ["llava", "llava-next", "llava-qwen2", "internvl-chat", "minicpmv", "phi3-v", "qwen2-vl"] +MULTI_MODAL_TEXT_GENERATION_MODELS = [ + "llava", + "llava-next", + "llava-qwen2", + "internvl-chat", + "minicpmv", + "phi3-v", + "qwen2-vl", +] def save_config(config, save_dir): diff --git a/optimum/intel/openvino/modeling_visual_language.py b/optimum/intel/openvino/modeling_visual_language.py index 7ca3a0cf15..856a85d184 100644 --- a/optimum/intel/openvino/modeling_visual_language.py +++ b/optimum/intel/openvino/modeling_visual_language.py @@ -2354,7 +2354,6 @@ def get_multimodal_embeddings( video_grid_thw=None, **kwargs, ): - inputs_embeds = torch.from_numpy(self.get_text_embeddings(input_ids)) if pixel_values is not None and input_ids.shape[1] != 1: image_embeds = torch.from_numpy(self.get_vision_embeddings(pixel_values, image_grid_thw))