From 68f91ddfc6bb513586783717ea9f83aaf94b872a Mon Sep 17 00:00:00 2001 From: Saaketh Date: Sat, 21 Sep 2024 21:22:39 -0700 Subject: [PATCH 1/4] yo --- llmfoundry/command_utils/eval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llmfoundry/command_utils/eval.py b/llmfoundry/command_utils/eval.py index e644ad1f0f..865b06625e 100644 --- a/llmfoundry/command_utils/eval.py +++ b/llmfoundry/command_utils/eval.py @@ -262,7 +262,7 @@ def evaluate(cfg: DictConfig) -> tuple[list[Trainer], pd.DataFrame]: EvalConfig, EVAL_CONFIG_KEYS, transforms=[allow_toplevel_keys], - icl_tasks_required=True, + icl_tasks_required=False, ) model_configs = eval_config.models From aba03d13467a9dc272e45d9cd939f0d60b5aa617 Mon Sep 17 00:00:00 2001 From: Saaketh Date: Wed, 23 Oct 2024 18:23:24 -0400 Subject: [PATCH 2/4] y --- llmfoundry/callbacks/hf_checkpointer.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llmfoundry/callbacks/hf_checkpointer.py b/llmfoundry/callbacks/hf_checkpointer.py index 2c4603ea87..2585ef6493 100644 --- a/llmfoundry/callbacks/hf_checkpointer.py +++ b/llmfoundry/callbacks/hf_checkpointer.py @@ -600,12 +600,6 @@ def tensor_hook( new_model_instance.load_state_dict(state_dict, assign=True) del state_dict - # Transform the model and tokenizer before saving - new_model_instance, original_tokenizer = self.transform_model_and_tokenizer( - new_model_instance, - original_tokenizer, - ) - # Ensure that the pretrained model name is correctly set on the saved HF checkpoint. if self.pretrained_model_name is not None: new_model_instance.name_or_path = self.pretrained_model_name @@ -616,6 +610,12 @@ def tensor_hook( k ].base_model_name_or_path = self.pretrained_model_name + # Transform the model and tokenizer before saving + new_model_instance, original_tokenizer = self.transform_model_and_tokenizer( + new_model_instance, + original_tokenizer, + ) + log.debug('Saving Hugging Face checkpoint to disk') if upload_to_save_folder: From 65a3873ec316e5754da56228e19c643c041f175e Mon Sep 17 00:00:00 2001 From: Saaketh Date: Thu, 24 Oct 2024 09:56:21 -0400 Subject: [PATCH 3/4] yo --- llmfoundry/callbacks/hf_checkpointer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llmfoundry/callbacks/hf_checkpointer.py b/llmfoundry/callbacks/hf_checkpointer.py index 2585ef6493..e1cf4dbc31 100644 --- a/llmfoundry/callbacks/hf_checkpointer.py +++ b/llmfoundry/callbacks/hf_checkpointer.py @@ -600,6 +600,8 @@ def tensor_hook( new_model_instance.load_state_dict(state_dict, assign=True) del state_dict + print("new_model_instance name or path BEFORE SETTING:", new_model_instance.name_or_path) + # Ensure that the pretrained model name is correctly set on the saved HF checkpoint. if self.pretrained_model_name is not None: new_model_instance.name_or_path = self.pretrained_model_name From b244d5efc07ad2f626e65d33a880f655cf9c1586 Mon Sep 17 00:00:00 2001 From: Saaketh Date: Thu, 24 Oct 2024 10:04:24 -0400 Subject: [PATCH 4/4] yo --- llmfoundry/callbacks/hf_checkpointer.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/llmfoundry/callbacks/hf_checkpointer.py b/llmfoundry/callbacks/hf_checkpointer.py index e1cf4dbc31..2585ef6493 100644 --- a/llmfoundry/callbacks/hf_checkpointer.py +++ b/llmfoundry/callbacks/hf_checkpointer.py @@ -600,8 +600,6 @@ def tensor_hook( new_model_instance.load_state_dict(state_dict, assign=True) del state_dict - print("new_model_instance name or path BEFORE SETTING:", new_model_instance.name_or_path) - # Ensure that the pretrained model name is correctly set on the saved HF checkpoint. if self.pretrained_model_name is not None: new_model_instance.name_or_path = self.pretrained_model_name