Skip to content

Commit

Permalink
Merge branch 'main' into add_openai_wrapper
Browse files Browse the repository at this point in the history
  • Loading branch information
bmosaicml authored Sep 13, 2023
2 parents 79ca900 + e75cfc9 commit f613b3b
Show file tree
Hide file tree
Showing 55 changed files with 1,623 additions and 509 deletions.
46 changes: 46 additions & 0 deletions .github/workflows/regression_yamls/eval-7b-composer.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
integrations:
- integration_type: git_repo
git_repo: mosaicml/llm-foundry
git_branch: main
pip_install: -e .[gpu]

command: |
cd llm-foundry/scripts/
composer eval/eval.py /mnt/config/parameters.yaml
image: mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04
name: mpt-7b-composer-eval-regression

compute:
gpus: 8

parameters:
run_name: mpt-7b-composer-eval-regression
seed: 1
max_seq_len: 1024

models:
-
model_name: mosaicml/mpt-7b
model:
name: hf_causal_lm
pretrained_model_name_or_path: mosaicml/mpt-7b
init_device: cpu
pretrained: true
tokenizer:
name: mosaicml/mpt-7b
kwargs:
model_max_length: ${max_seq_len}
load_path: 'FILL IN'

device_eval_batch_size: 4
precision: amp_fp16

fsdp_config:
sharding_strategy: FULL_SHARD
mixed_precision: FULL
forward_prefetch: True
limit_all_gathers: True

icl_subset_num_batches: 20
icl_tasks: 'eval/yamls/tasks.yaml'
eval_gauntlet: 'eval/yamls/eval_gauntlet.yaml'
45 changes: 45 additions & 0 deletions .github/workflows/regression_yamls/eval-7b-hf.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
integrations:
- integration_type: git_repo
git_repo: mosaicml/llm-foundry
git_branch: main
pip_install: -e .[gpu]

command: |
cd llm-foundry/scripts/
composer eval/eval.py /mnt/config/parameters.yaml
image: mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04
name: mpt-7b-hf-eval-regression

compute:
gpus: 8

parameters:
run_name: mpt-7b-hf-eval-regression
seed: 1
max_seq_len: 1024

models:
-
model_name: mosaicml/mpt-7b
model:
name: hf_causal_lm
pretrained_model_name_or_path: mosaicml/mpt-7b
init_device: cpu
pretrained: true
tokenizer:
name: mosaicml/mpt-7b
kwargs:
model_max_length: ${max_seq_len}

device_eval_batch_size: 4
precision: amp_fp16

fsdp_config:
sharding_strategy: FULL_SHARD
mixed_precision: FULL
forward_prefetch: True
limit_all_gathers: True

icl_subset_num_batches: 20
icl_tasks: 'eval/yamls/tasks.yaml'
eval_gauntlet: 'eval/yamls/eval_gauntlet.yaml'
121 changes: 121 additions & 0 deletions .github/workflows/regression_yamls/llama2-finetune.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
integrations:
- integration_type: git_repo
git_repo: mosaicml/llm-foundry
git_branch: main
pip_install: -e .[gpu]

command: |
cd llm-foundry/scripts
composer train/train.py /mnt/config/parameters.yaml
python inference/convert_composer_to_hf.py \
--composer_path ./llama2-finetune-regression/checkpoints/latest-rank0.pt \
--hf_output_path ./hf-checkpoints/
image: mosaicml/llm-foundry:1.13.1_cu117-latest
name: llama2-finetune-regression

compute:
gpus: 8

parameters:
tokenizer_name: meta-llama/Llama-2-13b-hf
max_seq_len: 4096
global_seed: 17

run_name: llama2-finetune-regression
max_split_size_mb: 512

model:
name: hf_causal_lm
init_device: mixed
pretrained_model_name_or_path: meta-llama/Llama-2-13b-hf
pretrained: true
use_auth_token: true
attention_patch_type: triton

tokenizer:
name: ${tokenizer_name}
kwargs:
model_max_length: ${max_seq_len}

train_loader:
name: finetuning
dataset:
hf_name: mosaicml/dolly_hhrlhf
split: train
max_seq_len: ${max_seq_len}
allow_pad_trimming: false
decoder_only_format: true
shuffle: true
drop_last: true
num_workers: 8
pin_memory: false
prefetch_factor: 2
persistent_workers: true
timeout: 0

eval_loader:
name: finetuning
dataset:
hf_name: mosaicml/dolly_hhrlhf
split: test
max_seq_len: ${max_seq_len}
allow_pad_trimming: false
decoder_only_format: true
shuffle: false
drop_last: true
num_workers: 8
pin_memory: false
prefetch_factor: 2
persistent_workers: true
timeout: 0

scheduler:
name: cosine_with_warmup
t_warmup: 100ba
alpha_f: 0.1

optimizer:
name: decoupled_lionw
lr: 5.0e-7
betas:
- 0.9
- 0.95
weight_decay: 0.0

algorithms:
gradient_clipping:
clipping_type: norm
clipping_threshold: 1.0

max_duration: 50ba
eval_first: false
eval_interval: 1ep
eval_subset_num_batches: -1
global_train_batch_size: 64

seed: ${global_seed}
device_eval_batch_size: 8
device_train_microbatch_size: 8
precision: amp_bf16

fsdp_config:
sharding_strategy: FULL_SHARD
mixed_precision: PURE
activation_checkpointing: true
activation_checkpointing_reentrant: false
activation_cpu_offload: false
limit_all_gathers: true
verbose: false

progress_bar: false
log_to_console: true
console_log_interval: 1ba

callbacks:
speed_monitor:
window_size: 10
lr_monitor: {}
memory_monitor: {}
runtime_estimator: {}

save_folder: ./{run_name}/checkpoints
122 changes: 122 additions & 0 deletions .github/workflows/regression_yamls/mpt-125m-chinchilla.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
integrations:
- integration_type: git_repo
git_repo: mosaicml/llm-foundry
git_branch: main
pip_install: -e .[gpu]

command: |
cd llm-foundry/scripts
python data_prep/convert_dataset_hf.py \
--dataset c4 --data_subset en \
--out_root ./my-copy-c4 --splits train val \
--concat_tokens 2048 --tokenizer EleutherAI/gpt-neox-20b --eos_text '<|endoftext|>'
composer train/train.py /mnt/config/parameters.yaml
python inference/convert_composer_to_hf.py \
--composer_path ./mpt-125m-chinchilla-regression/checkpoints/latest-rank0.pt \
--hf_output_path ./hf-checkpoints/
image: mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04
name: mpt-125m-chinchilla-regression

compute:
gpus: 8

parameters:
run_name: mpt-125m-chinchilla-regression
data_local: ./my-copy-c4
data_remote:
max_seq_len: 2048
global_seed: 17

model:
name: mpt_causal_lm
init_device: meta
d_model: 768
n_heads: 12
n_layers: 12
expansion_ratio: 4
max_seq_len: ${max_seq_len}
vocab_size: 50368
attn_config:
attn_impl: triton

tokenizer:
name: EleutherAI/gpt-neox-20b
kwargs:
model_max_length: ${max_seq_len}

train_loader:
name: text
dataset:
local: ${data_local}
remote: ${data_remote}
split: train
shuffle: true
max_seq_len: ${max_seq_len}
shuffle_seed: ${global_seed}
drop_last: true
num_workers: 8

eval_loader:
name: text
dataset:
local: ${data_local}
remote: ${data_remote}
split: val
shuffle: false
max_seq_len: ${max_seq_len}
shuffle_seed: ${global_seed}
drop_last: false
num_workers: 8

scheduler:
name: cosine_with_warmup
t_warmup: 100ba
alpha_f: 0.1

optimizer:
name: decoupled_adamw
lr: 6.0e-4
betas:
- 0.9
- 0.95
eps: 1.0e-08
weight_decay: 0.0

algorithms:
gradient_clipping:
clipping_type: norm
clipping_threshold: 1.0

max_duration: 4800ba
eval_interval: 500ba
eval_first: false
eval_subset_num_batches: -1
global_train_batch_size: 256

seed: ${global_seed}
device_eval_batch_size: 16
device_train_microbatch_size: auto
precision: amp_bf16

fsdp_config:
sharding_strategy: FULL_SHARD
mixed_precision: PURE
activation_checkpointing: false
activation_checkpointing_reentrant: false
activation_cpu_offload: false
limit_all_gathers: true
verbose: false

progress_bar: false
log_to_console: true
console_log_interval: 1ba

callbacks:
speed_monitor:
window_size: 10
lr_monitor: {}
memory_monitor: {}
runtime_estimator: {}

save_interval: 500ba
save_folder: ./{run_name}/checkpoints
Loading

0 comments on commit f613b3b

Please sign in to comment.