From 482cb28a1878a36b1e6f43b0873ed4da92c1781e Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Thu, 5 Dec 2024 23:00:41 +0800 Subject: [PATCH] Fix `tie_word_embeddings` handling for GGUF models (#35085) * fix tie_word_embeddings Signed-off-by: Isotr0py <2037008807@qq.com> * fix Signed-off-by: Isotr0py <2037008807@qq.com> --------- Signed-off-by: Isotr0py <2037008807@qq.com> --- src/transformers/modeling_gguf_pytorch_utils.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/transformers/modeling_gguf_pytorch_utils.py b/src/transformers/modeling_gguf_pytorch_utils.py index cca6d548cdf3ac..7562649be753bb 100644 --- a/src/transformers/modeling_gguf_pytorch_utils.py +++ b/src/transformers/modeling_gguf_pytorch_utils.py @@ -291,7 +291,6 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False): # FIXME: Currnetly this implementation is only for flan-t5 architecture. # It needs to be developed for supporting legacy t5. elif "t5" in architecture or "t5encoder" in architecture: - parsed_parameters["config"]["tie_word_embeddings"] = False parsed_parameters["config"]["is_gated_act"] = True updated_architecture = "t5" else: @@ -326,6 +325,12 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False): if architecture + model_size not in GGUF_SUPPORTED_ARCHITECTURES: raise ValueError(f"Architecture {architecture + model_size} not supported") + # Handle tie_word_embeddings, if lm_head.weight is not present in tensors, + # tie_word_embeddings is true otherwise false + parsed_parameters["config"]["tie_word_embeddings"] = all( + "output.weight" != tensor.name for tensor in reader.tensors + ) + # List all key-value pairs in a columnized format for gguf_key, field in reader.fields.items(): gguf_key = gguf_key.replace(architecture, updated_architecture)