From 5c050e43c9b79ae32588acff63e5d5b68db88328 Mon Sep 17 00:00:00 2001 From: Alessandra Stramiglio <82500224+alestrami@users.noreply.github.com> Date: Thu, 19 Dec 2024 11:32:19 +0100 Subject: [PATCH 1/3] fix argument and target on different device cuda --- src/transformers/models/llama/modeling_llama.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/transformers/models/llama/modeling_llama.py b/src/transformers/models/llama/modeling_llama.py index 5be33c26414cd7..2122a246d5bb7d 100644 --- a/src/transformers/models/llama/modeling_llama.py +++ b/src/transformers/models/llama/modeling_llama.py @@ -958,6 +958,7 @@ def forward( loss = None if labels is not None: + labels = labels.to(logits.device) loss = self.loss_function(logits=logits, labels=labels, pooled_logits=pooled_logits, config=self.config) if not return_dict: From 3c4d751f4f0131cc743fd8dbbd2c4e296c3868a7 Mon Sep 17 00:00:00 2001 From: Alessandra Stramiglio <82500224+alestrami@users.noreply.github.com> Date: Fri, 20 Dec 2024 13:28:05 +0100 Subject: [PATCH 2/3] fixed metadata.version with awq --- src/transformers/utils/import_utils.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/transformers/utils/import_utils.py b/src/transformers/utils/import_utils.py index 32a647594741dd..9db215fbc56c0d 100755 --- a/src/transformers/utils/import_utils.py +++ b/src/transformers/utils/import_utils.py @@ -46,6 +46,7 @@ def _is_package_available(pkg_name: str, return_version: bool = False) -> Union[ if package_exists: try: # Primary method to get the package version + if pkg_name == 'awq' : pkg_name='autoawq' package_version = importlib.metadata.version(pkg_name) except importlib.metadata.PackageNotFoundError: # Fallback method: Only for "torch" and versions containing "dev" @@ -93,11 +94,13 @@ def _is_package_available(pkg_name: str, return_version: bool = False) -> Union[ GGUF_MIN_VERSION = "0.10.0" XLA_FSDPV2_MIN_VERSION = "2.2.0" HQQ_MIN_VERSION = "0.2.1" +VPTQ_MIN_VERSION = "0.0.4" _accelerate_available, _accelerate_version = _is_package_available("accelerate", return_version=True) _apex_available = _is_package_available("apex") _aqlm_available = _is_package_available("aqlm") +_vptq_available, _vptq_version = _is_package_available("vptq", return_version=True) _av_available = importlib.util.find_spec("av") is not None _bitsandbytes_available = _is_package_available("bitsandbytes") _eetq_available = _is_package_available("eetq") @@ -192,7 +195,7 @@ def _is_package_available(pkg_name: str, return_version: bool = False) -> Union[ _tiktoken_available = _is_package_available("tiktoken") _blobfile_available = _is_package_available("blobfile") _liger_kernel_available = _is_package_available("liger_kernel") - +_triton_available = _is_package_available("triton") _torch_version = "N/A" _torch_available = False @@ -816,6 +819,10 @@ def is_aqlm_available(): return _aqlm_available +def is_vptq_available(min_version: str = VPTQ_MIN_VERSION): + return _vptq_available and version.parse(_vptq_version) >= version.parse(min_version) + + def is_av_available(): return _av_available @@ -1243,6 +1250,10 @@ def is_liger_kernel_available(): return version.parse(importlib.metadata.version("liger_kernel")) >= version.parse("0.3.0") +def is_triton_available(): + return _triton_available + + # docstyle-ignore AV_IMPORT_ERROR = """ {0} requires the PyAv library but it was not found in your environment. You can install it with: From b71093c693498b17e7691b422f750287870acb4b Mon Sep 17 00:00:00 2001 From: Alessandra Stramiglio <82500224+alestrami@users.noreply.github.com> Date: Fri, 20 Dec 2024 15:25:43 +0100 Subject: [PATCH 3/3] Add files via upload fix import CompileConfig --- src/transformers/modeling_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 49d086c76e8683..50984736b45a19 100755 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -43,7 +43,8 @@ from .activations import get_activation from .configuration_utils import PretrainedConfig from .dynamic_module_utils import custom_object_save -from .generation import CompileConfig, GenerationConfig, GenerationMixin +from .generation.configuration_utils import CompileConfig, GenerationConfig +from .generation import GenerationMixin from .integrations import PeftAdapterMixin, deepspeed_config, is_deepspeed_zero3_enabled from .integrations.flash_attention import flash_attention_forward from .integrations.flex_attention import flex_attention_forward @@ -54,7 +55,6 @@ apply_chunking_to_forward, find_pruneable_heads_and_indices, id_tensor_storage, - is_torch_greater_or_equal_than_1_13, prune_conv1d_layer, prune_layer, prune_linear_layer, @@ -476,7 +476,7 @@ def load_sharded_checkpoint(model, folder, strict=True, prefer_safe=True): error_message += f"\nMissing key(s): {str_unexpected_keys}." raise RuntimeError(error_message) - weights_only_kwarg = {"weights_only": True} if is_torch_greater_or_equal_than_1_13 else {} + weights_only_kwarg = {"weights_only": True} loader = safe_load_file if load_safe else partial(torch.load, map_location="cpu", **weights_only_kwarg) for shard_file in shard_files: @@ -532,7 +532,7 @@ def load_state_dict( and is_zipfile(checkpoint_file) ): extra_args = {"mmap": True} - weights_only_kwarg = {"weights_only": weights_only} if is_torch_greater_or_equal_than_1_13 else {} + weights_only_kwarg = {"weights_only": weights_only} return torch.load( checkpoint_file, map_location=map_location,