Merge branch 'main' into add_openai_wrapper

mosaicml · Sep 13, 2023 · f613b3b · f613b3b
2 parents 79ca900 + e75cfc9
commit f613b3b
Show file tree

Hide file tree

Showing 55 changed files with 1,623 additions and 509 deletions.
diff --git a/.github/workflows/regression_yamls/eval-7b-composer.yaml b/.github/workflows/regression_yamls/eval-7b-composer.yaml
@@ -0,0 +1,46 @@
+integrations:
+- integration_type: git_repo
+  git_repo: mosaicml/llm-foundry
+  git_branch: main
+  pip_install: -e .[gpu]
+
+command: |
+  cd llm-foundry/scripts/
+  composer eval/eval.py /mnt/config/parameters.yaml
+image: mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04
+name: mpt-7b-composer-eval-regression
+
+compute:
+  gpus: 8
+
+parameters:
+  run_name: mpt-7b-composer-eval-regression
+  seed: 1
+  max_seq_len: 1024
+
+  models:
+  -
+    model_name: mosaicml/mpt-7b
+    model:
+      name: hf_causal_lm
+      pretrained_model_name_or_path: mosaicml/mpt-7b
+      init_device: cpu
+      pretrained: true
+    tokenizer:
+      name: mosaicml/mpt-7b
+      kwargs:
+        model_max_length: ${max_seq_len}
+    load_path: 'FILL IN'
+
+  device_eval_batch_size: 4
+  precision: amp_fp16
+
+  fsdp_config:
+    sharding_strategy: FULL_SHARD
+    mixed_precision: FULL
+    forward_prefetch: True
+    limit_all_gathers: True
+
+  icl_subset_num_batches: 20
+  icl_tasks: 'eval/yamls/tasks.yaml'
+  eval_gauntlet: 'eval/yamls/eval_gauntlet.yaml'
diff --git a/.github/workflows/regression_yamls/eval-7b-hf.yaml b/.github/workflows/regression_yamls/eval-7b-hf.yaml
@@ -0,0 +1,45 @@
+integrations:
+- integration_type: git_repo
+  git_repo: mosaicml/llm-foundry
+  git_branch: main
+  pip_install: -e .[gpu]
+
+command: |
+  cd llm-foundry/scripts/
+  composer eval/eval.py /mnt/config/parameters.yaml
+image: mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04
+name: mpt-7b-hf-eval-regression
+
+compute:
+  gpus: 8
+
+parameters:
+  run_name: mpt-7b-hf-eval-regression
+  seed: 1
+  max_seq_len: 1024
+
+  models:
+  -
+    model_name: mosaicml/mpt-7b
+    model:
+      name: hf_causal_lm
+      pretrained_model_name_or_path: mosaicml/mpt-7b
+      init_device: cpu
+      pretrained: true
+    tokenizer:
+      name: mosaicml/mpt-7b
+      kwargs:
+        model_max_length: ${max_seq_len}
+
+  device_eval_batch_size: 4
+  precision: amp_fp16
+
+  fsdp_config:
+    sharding_strategy: FULL_SHARD
+    mixed_precision: FULL
+    forward_prefetch: True
+    limit_all_gathers: True
+
+  icl_subset_num_batches: 20
+  icl_tasks: 'eval/yamls/tasks.yaml'
+  eval_gauntlet: 'eval/yamls/eval_gauntlet.yaml'
diff --git a/.github/workflows/regression_yamls/llama2-finetune.yaml b/.github/workflows/regression_yamls/llama2-finetune.yaml
@@ -0,0 +1,121 @@
+integrations:
+- integration_type: git_repo
+  git_repo: mosaicml/llm-foundry
+  git_branch: main
+  pip_install: -e .[gpu]
+
+command: |
+  cd llm-foundry/scripts
+  composer train/train.py /mnt/config/parameters.yaml
+  python inference/convert_composer_to_hf.py \
+    --composer_path ./llama2-finetune-regression/checkpoints/latest-rank0.pt \
+    --hf_output_path ./hf-checkpoints/
+image: mosaicml/llm-foundry:1.13.1_cu117-latest
+name: llama2-finetune-regression
+
+compute:
+  gpus: 8
+
+parameters:
+  tokenizer_name: meta-llama/Llama-2-13b-hf
+  max_seq_len: 4096
+  global_seed: 17
+
+  run_name: llama2-finetune-regression
+  max_split_size_mb: 512
+
+  model:
+    name: hf_causal_lm
+    init_device: mixed
+    pretrained_model_name_or_path: meta-llama/Llama-2-13b-hf
+    pretrained: true
+    use_auth_token: true
+    attention_patch_type: triton
+
+  tokenizer:
+    name: ${tokenizer_name}
+    kwargs:
+      model_max_length: ${max_seq_len}
+
+  train_loader:
+    name: finetuning
+    dataset:
+      hf_name: mosaicml/dolly_hhrlhf
+      split: train
+      max_seq_len: ${max_seq_len}
+      allow_pad_trimming: false
+      decoder_only_format: true
+      shuffle: true
+    drop_last: true
+    num_workers: 8
+    pin_memory: false
+    prefetch_factor: 2
+    persistent_workers: true
+    timeout: 0
+
+  eval_loader:
+    name: finetuning
+    dataset:
+      hf_name: mosaicml/dolly_hhrlhf
+      split: test
+      max_seq_len: ${max_seq_len}
+      allow_pad_trimming: false
+      decoder_only_format: true
+      shuffle: false
+    drop_last: true
+    num_workers: 8
+    pin_memory: false
+    prefetch_factor: 2
+    persistent_workers: true
+    timeout: 0
+
+  scheduler:
+    name: cosine_with_warmup
+    t_warmup: 100ba
+    alpha_f: 0.1
+
+  optimizer:
+    name: decoupled_lionw
+    lr: 5.0e-7
+    betas:
+    - 0.9
+    - 0.95
+    weight_decay: 0.0
+
+  algorithms:
+    gradient_clipping:
+      clipping_type: norm
+      clipping_threshold: 1.0
+
+  max_duration: 50ba
+  eval_first: false
+  eval_interval: 1ep
+  eval_subset_num_batches: -1
+  global_train_batch_size: 64
+
+  seed: ${global_seed}
+  device_eval_batch_size: 8
+  device_train_microbatch_size: 8
+  precision: amp_bf16
+
+  fsdp_config:
+    sharding_strategy: FULL_SHARD
+    mixed_precision: PURE
+    activation_checkpointing: true
+    activation_checkpointing_reentrant: false
+    activation_cpu_offload: false
+    limit_all_gathers: true
+    verbose: false
+
+  progress_bar: false
+  log_to_console: true
+  console_log_interval: 1ba
+
+  callbacks:
+    speed_monitor:
+      window_size: 10
+    lr_monitor: {}
+    memory_monitor: {}
+    runtime_estimator: {}
+
+  save_folder: ./{run_name}/checkpoints
diff --git a/.github/workflows/regression_yamls/mpt-125m-chinchilla.yaml b/.github/workflows/regression_yamls/mpt-125m-chinchilla.yaml
@@ -0,0 +1,122 @@
+integrations:
+- integration_type: git_repo
+  git_repo: mosaicml/llm-foundry
+  git_branch: main
+  pip_install: -e .[gpu]
+
+command: |
+  cd llm-foundry/scripts
+  python data_prep/convert_dataset_hf.py \
+    --dataset c4 --data_subset en \
+    --out_root ./my-copy-c4 --splits train val \
+    --concat_tokens 2048 --tokenizer EleutherAI/gpt-neox-20b --eos_text '<|endoftext|>'
+  composer train/train.py /mnt/config/parameters.yaml
+  python inference/convert_composer_to_hf.py \
+    --composer_path ./mpt-125m-chinchilla-regression/checkpoints/latest-rank0.pt \
+    --hf_output_path ./hf-checkpoints/
+image: mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04
+name: mpt-125m-chinchilla-regression
+
+compute:
+  gpus: 8
+
+parameters:
+  run_name: mpt-125m-chinchilla-regression
+  data_local: ./my-copy-c4
+  data_remote:
+  max_seq_len: 2048
+  global_seed: 17
+
+  model:
+    name: mpt_causal_lm
+    init_device: meta
+    d_model: 768
+    n_heads: 12
+    n_layers: 12
+    expansion_ratio: 4
+    max_seq_len: ${max_seq_len}
+    vocab_size: 50368
+    attn_config:
+      attn_impl: triton
+
+  tokenizer:
+    name: EleutherAI/gpt-neox-20b
+    kwargs:
+      model_max_length: ${max_seq_len}
+
+  train_loader:
+    name: text
+    dataset:
+      local: ${data_local}
+      remote: ${data_remote}
+      split: train
+      shuffle: true
+      max_seq_len: ${max_seq_len}
+      shuffle_seed: ${global_seed}
+    drop_last: true
+    num_workers: 8
+
+  eval_loader:
+    name: text
+    dataset:
+      local: ${data_local}
+      remote: ${data_remote}
+      split: val
+      shuffle: false
+      max_seq_len: ${max_seq_len}
+      shuffle_seed: ${global_seed}
+    drop_last: false
+    num_workers: 8
+
+  scheduler:
+    name: cosine_with_warmup
+    t_warmup: 100ba
+    alpha_f: 0.1
+
+  optimizer:
+    name: decoupled_adamw
+    lr: 6.0e-4
+    betas:
+    - 0.9
+    - 0.95
+    eps: 1.0e-08
+    weight_decay: 0.0
+
+  algorithms:
+    gradient_clipping:
+      clipping_type: norm
+      clipping_threshold: 1.0
+
+  max_duration: 4800ba
+  eval_interval: 500ba
+  eval_first: false
+  eval_subset_num_batches: -1
+  global_train_batch_size: 256
+
+  seed: ${global_seed}
+  device_eval_batch_size: 16
+  device_train_microbatch_size: auto
+  precision: amp_bf16
+
+  fsdp_config:
+    sharding_strategy: FULL_SHARD
+    mixed_precision: PURE
+    activation_checkpointing: false
+    activation_checkpointing_reentrant: false
+    activation_cpu_offload: false
+    limit_all_gathers: true
+    verbose: false
+
+  progress_bar: false
+  log_to_console: true
+  console_log_interval: 1ba
+
+  callbacks:
+    speed_monitor:
+      window_size: 10
+    lr_monitor: {}
+    memory_monitor: {}
+    runtime_estimator: {}
+
+  save_interval: 500ba
+  save_folder: ./{run_name}/checkpoints