From 2b9f682864ee9515df0c03e2886a17c024b25df5 Mon Sep 17 00:00:00 2001 From: Charles Tang Date: Thu, 5 Dec 2024 11:41:01 -0800 Subject: [PATCH] Fix llama3 example yamls (#1688) Co-authored-by: Chuck Tang --- mcli/mcli-llama2-finetune.yaml | 1 + mcli/mcli-llama3-70b-instruct-finetune.yaml | 2 ++ 2 files changed, 3 insertions(+) diff --git a/mcli/mcli-llama2-finetune.yaml b/mcli/mcli-llama2-finetune.yaml index d675cb9767..24586c317d 100644 --- a/mcli/mcli-llama2-finetune.yaml +++ b/mcli/mcli-llama2-finetune.yaml @@ -8,6 +8,7 @@ integrations: command: | cd llm-foundry/scripts + export HF_HUB_ENABLE_HF_TRANSFER=1 composer train/train.py /mnt/config/parameters.yaml image: mosaicml/llm-foundry:2.5.1_cu124-latest name: llama2-finetune diff --git a/mcli/mcli-llama3-70b-instruct-finetune.yaml b/mcli/mcli-llama3-70b-instruct-finetune.yaml index 1bb3f17b01..10c040808d 100644 --- a/mcli/mcli-llama3-70b-instruct-finetune.yaml +++ b/mcli/mcli-llama3-70b-instruct-finetune.yaml @@ -8,6 +8,7 @@ integrations: command: | cd llm-foundry/scripts + export HF_HUB_ENABLE_HF_TRANSFER=1 composer train/train.py /mnt/config/parameters.yaml image: mosaicml/llm-foundry:2.5.1_cu124-latest name: llama3.1-70b-finetune @@ -31,6 +32,7 @@ parameters: run_name: # If left blank, will be read from env var $RUN_NAME max_split_size_mb: 512 + dist_timeout: 3600 # set to avoid NCCL timeouts # Model model: