Skip to content

Commit

Permalink
fix testing condition for 2024.6 and unpatch in case if failed
Browse files Browse the repository at this point in the history
  • Loading branch information
eaidova committed Dec 20, 2024
1 parent 9ddc5a8 commit a241a7d
Show file tree
Hide file tree
Showing 2 changed files with 132 additions and 126 deletions.
252 changes: 129 additions & 123 deletions optimum/exporters/openvino/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,144 +355,150 @@ class StoreAttr(object):
loading_kwargs["torch_dtype"] = dtype
patch_16bit = True

if library_name == "open_clip":
model = _OpenClipForZeroShotImageClassification.from_pretrained(model_name_or_path, cache_dir=cache_dir)
else:
model = TasksManager.get_model_from_task(
task,
model_name_or_path,
subfolder=subfolder,
revision=revision,
cache_dir=cache_dir,
token=token,
local_files_only=local_files_only,
force_download=force_download,
trust_remote_code=trust_remote_code,
framework=framework,
device=device,
library_name=library_name,
**loading_kwargs,
)
try:
if library_name == "open_clip":
model = _OpenClipForZeroShotImageClassification.from_pretrained(model_name_or_path, cache_dir=cache_dir)
else:
model = TasksManager.get_model_from_task(
task,
model_name_or_path,
subfolder=subfolder,
revision=revision,
cache_dir=cache_dir,
token=token,
local_files_only=local_files_only,
force_download=force_download,
trust_remote_code=trust_remote_code,
framework=framework,
device=device,
library_name=library_name,
**loading_kwargs,
)

needs_pad_token_id = task == "text-classification" and getattr(model.config, "pad_token_id", None) is None

needs_pad_token_id = task == "text-classification" and getattr(model.config, "pad_token_id", None) is None
if needs_pad_token_id:
if pad_token_id is not None:
model.config.pad_token_id = pad_token_id
else:
tok = AutoTokenizer.from_pretrained(model_name_or_path)
pad_token_id = getattr(tok, "pad_token_id", None)
if pad_token_id is None:
raise ValueError(
"Could not infer the pad token id, which is needed in this case, please provide it with the --pad_token_id argument"
)
model.config.pad_token_id = pad_token_id

if needs_pad_token_id:
if pad_token_id is not None:
model.config.pad_token_id = pad_token_id
if hasattr(model.config, "export_model_type"):
model_type = model.config.export_model_type.replace("_", "-")
else:
tok = AutoTokenizer.from_pretrained(model_name_or_path)
pad_token_id = getattr(tok, "pad_token_id", None)
if pad_token_id is None:
raise ValueError(
"Could not infer the pad token id, which is needed in this case, please provide it with the --pad_token_id argument"
model_type = model.config.model_type.replace("_", "-")

if (
not custom_architecture
and library_name != "diffusers"
and task + "-with-past"
in TasksManager.get_supported_tasks_for_model_type(
model_type, exporter="openvino", library_name=library_name
)
):
# Make -with-past the default if --task was not explicitely specified
if original_task == "auto":
task = task + "-with-past"
else:
logger.info(
f"The task `{task}` was manually specified, and past key values will not be reused in the decoding."
f" if needed, please pass `--task {task}-with-past` to export using the past key values."
)
model.config.pad_token_id = pad_token_id

if hasattr(model.config, "export_model_type"):
model_type = model.config.export_model_type.replace("_", "-")
else:
model_type = model.config.model_type.replace("_", "-")

if (
not custom_architecture
and library_name != "diffusers"
and task + "-with-past"
in TasksManager.get_supported_tasks_for_model_type(model_type, exporter="openvino", library_name=library_name)
):
# Make -with-past the default if --task was not explicitely specified
if original_task == "auto":
task = task + "-with-past"
else:
logger.info(
f"The task `{task}` was manually specified, and past key values will not be reused in the decoding."
f" if needed, please pass `--task {task}-with-past` to export using the past key values."
)
synonyms_for_task = sorted(TasksManager.synonyms_for_task(task))
if synonyms_for_task:
synonyms_for_task = ", ".join(synonyms_for_task)
possible_synonyms = f" (possible synonyms are: {synonyms_for_task})"
else:
possible_synonyms = ""
logger.info(f"Automatic task detection to {task}{possible_synonyms}.")

if original_task == "auto":
synonyms_for_task = sorted(TasksManager.synonyms_for_task(task))
if synonyms_for_task:
synonyms_for_task = ", ".join(synonyms_for_task)
possible_synonyms = f" (possible synonyms are: {synonyms_for_task})"
else:
possible_synonyms = ""
logger.info(f"Automatic task detection to {task}{possible_synonyms}.")
preprocessors = maybe_load_preprocessors(
model_name_or_path, subfolder=subfolder, trust_remote_code=trust_remote_code
)

preprocessors = maybe_load_preprocessors(
model_name_or_path, subfolder=subfolder, trust_remote_code=trust_remote_code
)
submodel_paths = export_from_model(
model=model,
output=output,
task=task,
ov_config=ov_config,
stateful=stateful,
model_kwargs=model_kwargs,
custom_export_configs=custom_export_configs,
fn_get_submodels=fn_get_submodels,
preprocessors=preprocessors,
device=device,
trust_remote_code=trust_remote_code,
patch_16bit_model=patch_16bit,
**kwargs_shapes,
)

submodel_paths = export_from_model(
model=model,
output=output,
task=task,
ov_config=ov_config,
stateful=stateful,
model_kwargs=model_kwargs,
custom_export_configs=custom_export_configs,
fn_get_submodels=fn_get_submodels,
preprocessors=preprocessors,
device=device,
trust_remote_code=trust_remote_code,
patch_16bit_model=patch_16bit,
**kwargs_shapes,
)
if convert_tokenizer:
maybe_convert_tokenizers(library_name, output, model, preprocessors, task=task)

if convert_tokenizer:
maybe_convert_tokenizers(library_name, output, model, preprocessors, task=task)

clear_class_registry()
del model
gc.collect()

for submodel_path in submodel_paths:
submodel_path = Path(output) / submodel_path
submodel = core.read_model(submodel_path)

quantization_config = None
if ov_config is None:
num_parameters = 0
for op in submodel.get_ops():
if op.get_type_name() == "Constant" and op.get_element_type() in [Type.f16, Type.f32, Type.bf16]:
num_parameters += reduce(operator.mul, op.shape, 1)
del op
if num_parameters >= _MAX_UNCOMPRESSED_SIZE:
if is_nncf_available():
quantization_config = {"bits": 8, "sym": False}
logger.info("The model weights will be quantized to int8_asym.")
else:
logger.warning(
"The model will be converted with no weights quantization. Quantization of the weights to int8 "
"requires nncf. Please install it with `pip install nncf`"
)
break
else:
quantization_config = ov_config.quantization_config
if quantization_config is None:
del submodel
gc.collect()
continue
clear_class_registry()
del model
gc.collect()

if not is_nncf_available():
raise ImportError("Quantization of the weights requires nncf, please install it with `pip install nncf`")
for submodel_path in submodel_paths:
submodel_path = Path(output) / submodel_path
submodel = core.read_model(submodel_path)

quantization_config = None
if ov_config is None:
num_parameters = 0
for op in submodel.get_ops():
if op.get_type_name() == "Constant" and op.get_element_type() in [Type.f16, Type.f32, Type.bf16]:
num_parameters += reduce(operator.mul, op.shape, 1)
del op
if num_parameters >= _MAX_UNCOMPRESSED_SIZE:
if is_nncf_available():
quantization_config = {"bits": 8, "sym": False}
logger.info("The model weights will be quantized to int8_asym.")
else:
logger.warning(
"The model will be converted with no weights quantization. Quantization of the weights to int8 "
"requires nncf. Please install it with `pip install nncf`"
)
break
else:
quantization_config = ov_config.quantization_config
if quantization_config is None:
del submodel
gc.collect()
continue

if not is_nncf_available():
raise ImportError(
"Quantization of the weights requires nncf, please install it with `pip install nncf`"
)

from optimum.intel.openvino.quantization import _weight_only_quantization
from optimum.intel.openvino.quantization import _weight_only_quantization

_weight_only_quantization(submodel, quantization_config)
compressed_submodel_path = submodel_path.parent / f"{submodel_path.stem}_compressed.xml"
save_model(submodel, compressed_submodel_path, compress_to_fp16=False)
del submodel
gc.collect()
_weight_only_quantization(submodel, quantization_config)
compressed_submodel_path = submodel_path.parent / f"{submodel_path.stem}_compressed.xml"
save_model(submodel, compressed_submodel_path, compress_to_fp16=False)
del submodel
gc.collect()

submodel_path.unlink()
submodel_path.with_suffix(".bin").unlink()
compressed_submodel_path.rename(submodel_path)
compressed_submodel_path.with_suffix(".bin").rename(submodel_path.with_suffix(".bin"))
submodel_path.unlink()
submodel_path.with_suffix(".bin").unlink()
compressed_submodel_path.rename(submodel_path)
compressed_submodel_path.with_suffix(".bin").rename(submodel_path.with_suffix(".bin"))

# Unpatch modules after GPTQ export
if do_quant_patching:
torch.cuda.is_available = orig_cuda_check
if do_gptq_patching:
GPTQQuantizer.post_init_model = orig_post_init_model
finally:
# Unpatch modules after quantized model export
if do_quant_patching:
torch.cuda.is_available = orig_cuda_check
if do_gptq_patching:
GPTQQuantizer.post_init_model = orig_post_init_model


def maybe_convert_tokenizers(library_name: str, output: Path, model=None, preprocessors=None, task=None):
Expand Down
6 changes: 3 additions & 3 deletions tests/openvino/test_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -921,9 +921,9 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase):
if platform.system() != "Windows":
SUPPORTED_ARCHITECTURES += ("opt_gptq",)

# autoawq install disabled for windows test environment
if is_openvino_version(">=", "2024.6.0") and platform.system() != "Windows":
SUPPORTED_ARCHITECTURES += ("mixtral_awq",)
# autoawq install disabled for windows test environment
if is_openvino_version(">=", "2024.6.0") and platform.system() != "Windows":
SUPPORTED_ARCHITECTURES += ("mixtral_awq",)

GENERATION_LENGTH = 100
REMOTE_CODE_MODELS = (
Expand Down

0 comments on commit a241a7d

Please sign in to comment.