diff --git a/src/transformers/modeling_gguf_pytorch_utils.py b/src/transformers/modeling_gguf_pytorch_utils.py index cca6d548cdf3ac..7562649be753bb 100644 --- a/src/transformers/modeling_gguf_pytorch_utils.py +++ b/src/transformers/modeling_gguf_pytorch_utils.py @@ -291,7 +291,6 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False): # FIXME: Currnetly this implementation is only for flan-t5 architecture. # It needs to be developed for supporting legacy t5. elif "t5" in architecture or "t5encoder" in architecture: - parsed_parameters["config"]["tie_word_embeddings"] = False parsed_parameters["config"]["is_gated_act"] = True updated_architecture = "t5" else: @@ -326,6 +325,12 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False): if architecture + model_size not in GGUF_SUPPORTED_ARCHITECTURES: raise ValueError(f"Architecture {architecture + model_size} not supported") + # Handle tie_word_embeddings, if lm_head.weight is not present in tensors, + # tie_word_embeddings is true otherwise false + parsed_parameters["config"]["tie_word_embeddings"] = all( + "output.weight" != tensor.name for tensor in reader.tensors + ) + # List all key-value pairs in a columnized format for gguf_key, field in reader.fields.items(): gguf_key = gguf_key.replace(architecture, updated_architecture)