diff --git a/mcli/mcli-llama2-finetune.yaml b/mcli/mcli-llama2-finetune.yaml index d675cb9767..24586c317d 100644 --- a/mcli/mcli-llama2-finetune.yaml +++ b/mcli/mcli-llama2-finetune.yaml @@ -8,6 +8,7 @@ integrations: command: | cd llm-foundry/scripts + export HF_HUB_ENABLE_HF_TRANSFER=1 composer train/train.py /mnt/config/parameters.yaml image: mosaicml/llm-foundry:2.5.1_cu124-latest name: llama2-finetune diff --git a/mcli/mcli-llama3-70b-instruct-finetune.yaml b/mcli/mcli-llama3-70b-instruct-finetune.yaml index 1bb3f17b01..10c040808d 100644 --- a/mcli/mcli-llama3-70b-instruct-finetune.yaml +++ b/mcli/mcli-llama3-70b-instruct-finetune.yaml @@ -8,6 +8,7 @@ integrations: command: | cd llm-foundry/scripts + export HF_HUB_ENABLE_HF_TRANSFER=1 composer train/train.py /mnt/config/parameters.yaml image: mosaicml/llm-foundry:2.5.1_cu124-latest name: llama3.1-70b-finetune @@ -31,6 +32,7 @@ parameters: run_name: # If left blank, will be read from env var $RUN_NAME max_split_size_mb: 512 + dist_timeout: 3600 # set to avoid NCCL timeouts # Model model: