Add pipeline schedules script #1099
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: PiPPy tests | |
on: | |
push: | |
branches: | |
- main | |
pull_request: | |
paths: | |
- '.github/workflows/pippy**' | |
- 'pippy/**' | |
- 'test/**' | |
- 'examples/**' | |
- '!docs/**' | |
- '!**.md' | |
- 'requirements.txt' | |
concurrency: | |
# Cancel CI on previous commit when a new commit is pushed to the same branch | |
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} | |
cancel-in-progress: true | |
jobs: | |
# pytest_tests: | |
# runs-on: linux.4xlarge | |
# strategy: | |
# matrix: | |
# python-version: ["3.8", "3.9"] | |
# container: | |
# image: python:${{ matrix.python-version }} | |
# steps: | |
# - uses: actions/checkout@v2 | |
# - name: Install dependencies | |
# run: | | |
# python -m pip install --upgrade pip | |
# pip install flake8 pytest pytest-cov pytest-xdist numpy | |
# if [ -f requirements.txt ]; then pip install -r requirements.txt --find-links https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html; fi | |
# - name: Install pippy | |
# run: "python setup.py install" | |
# - name: Test with pytest | |
# run: | | |
# pytest --cov=pippy test/ | |
# hf_model_tests: | |
# runs-on: linux.12xlarge | |
# strategy: | |
# matrix: | |
# python-version: ["3.9"] | |
# shard: ["0", "1", "2", "3", "4", "5", "6", "7"] | |
# container: | |
# image: python:${{ matrix.python-version }} | |
# steps: | |
# - uses: actions/checkout@v2 | |
# - name: Install dependencies | |
# run: | | |
# python -m pip install --upgrade pip | |
# pip install flake8 pytest pytest-cov pytest-xdist pytest-shard numpy | |
# if [ -f requirements.txt ]; then pip install -r requirements.txt --find-links https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html; fi | |
# - name: Install pavel's huggingface fork | |
# run: pip install git+https://github.com/huggingface/transformers.git@main sentencepiece six sacremoses | |
# - name: Install pippy | |
# run: "python setup.py install" | |
# # Single thread to avoid OOM | |
# - name: Test forward only | |
# run: | | |
# pytest --shard-id=${{ matrix.shard }} --num-shards=8 -k 'not HFModelsForwardBackwardTest' -sv --cov=pippy test/hf_test.py | |
# - name: Test forward and backward | |
# run: | | |
# pytest --shard-id=${{ matrix.shard }} --num-shards=8 -k 'HFModelsForwardBackwardTest' -sv --cov=pippy test/hf_test.py | |
integration_test_cpu: | |
runs-on: linux.4xlarge | |
strategy: | |
matrix: | |
python-version: ["3.9"] | |
schedule: ["FillDrain"] | |
env: | |
OMP_NUM_THREADS: "1" | |
container: | |
image: python:${{ matrix.python-version }} | |
steps: | |
- uses: actions/checkout@v2 | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install flake8 pytest pytest-cov numpy datasets evaluate scikit-learn sacrebleu | |
if [ -f requirements.txt ]; then pip install -r requirements.txt --find-links https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html; fi | |
- name: Install pippy | |
run: "python setup.py install" | |
- name: Test forward pipe generation | |
run: python test/test_pipe.py | |
- name: Test backward pipe generation | |
run: python test/test_pipe_bwd.py | |
- name: Run forward-only integration test | |
run: torchrun --nproc-per-node 4 test/test_fwd.py | |
- name: Run forward-loss-backward integration test | |
run: torchrun --nproc-per-node 4 test/test_bwd.py --schedule ${{ matrix.schedule }} | |
# - name: Run null_coalesce_accumulate integration test | |
# run: python test/local_test_null_coalesce_accumulate.py --replicate ${{ matrix.replicate }} --schedule ${{ matrix.schedule }} | |
# - name: Run PP + DDP test | |
# run: python test/local_test_ddp.py --replicate ${{ matrix.replicate }} --schedule ${{ matrix.schedule }} --checkpoint ${{ matrix.checkpoint }} | |
#- name: Run HF BERT forward-only integration test | |
# run: python test/local_test_forward_hf_bert.py --replicate ${{ matrix.replicate }} --schedule ${{ matrix.schedule }} --checkpoint ${{ matrix.checkpoint }} | |
# - name: Run HF GPT2 forward-only integration test | |
# run: python test/local_test_forward_hf_gpt2.py --replicate ${{ matrix.replicate }} --schedule ${{ matrix.schedule }} --checkpoint ${{ matrix.checkpoint }} | |
# - name: Run auto-split test | |
# run: python test/local_test_autosplit.py --replicate ${{ matrix.replicate }} --schedule ${{ matrix.schedule }} | |
# hf_examples_set1: | |
# runs-on: linux.12xlarge | |
# strategy: | |
# matrix: | |
# python-version: ["3.9"] | |
# schedule: ["FillDrain", "1F1B"] | |
# env: | |
# OMP_NUM_THREADS: "1" | |
# container: | |
# image: python:${{ matrix.python-version }} | |
# steps: | |
# - uses: actions/checkout@v2 | |
# - name: Install dependencies | |
# run: | | |
# python -m pip install --upgrade pip | |
# pip install flake8 pytest pytest-cov numpy datasets evaluate scikit-learn sacrebleu | |
# if [ -f requirements.txt ]; then pip install -r requirements.txt --find-links https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html; fi | |
# - name: Install pavel's huggingface fork | |
# run: pip install git+https://github.com/huggingface/transformers.git@main sentencepiece six sacremoses | |
# - name: Install pippy | |
# run: "python setup.py install" | |
# - name: Test min-GPT | |
# run: | | |
# git config --global --add safe.directory /__w/tau/tau | |
# git submodule update --init test/minGPT | |
# python test/min_gpt_tracing.py | |
# - name: Run GPT2 example | |
# run: python examples/hf/gpt2/pippy_gpt2.py --schedule ${{ matrix.schedule }} | |
# - name: Run BERT example | |
# run: python examples/hf/bert/pippy_bert.py --schedule ${{ matrix.schedule }} | |
# - name: Run T5 example | |
# run: python examples/hf/t5/pippy_t5.py --schedule ${{ matrix.schedule }} | |
# - name: "HF Translation: fine-tune T5 model translation English to Romanian" | |
# run: > | |
# python examples/hf/translation/run_translation.py --model_name_or_path t5-small --do_train --source_lang en --target_lang ro --source_prefix "translate English to Romanian: " --dataset_name wmt16 --dataset_config_name ro-en --output_dir /tmp/tst-translation --per_device_train_batch_size=8 --per_device_eval_batch_size=8 --overwrite_output_dir --predict_with_generate --max_steps=10 --dp_group_size=1 --pp_group_size=8 | |
# - name: "HF Translation: fine-tune BART model translation English to Romanian" | |
# run: > | |
# python examples/hf/translation/run_translation.py --model_name_or_path facebook/bart-base --do_train --source_lang en --target_lang ro --source_prefix "translate English to Romanian: " --dataset_name wmt16 --dataset_config_name ro-en --output_dir /tmp/tst-translation --per_device_train_batch_size=8 --per_device_eval_batch_size=8 --overwrite_output_dir --predict_with_generate --max_steps=10 --dp_group_size=2 --pp_group_size=8 | |
# hf_examples_set2: | |
# runs-on: linux.12xlarge | |
# strategy: | |
# matrix: | |
# python-version: ["3.9"] | |
# schedule: ["FillDrain", "1F1B"] | |
# env: | |
# OMP_NUM_THREADS: "1" | |
# container: | |
# image: python:${{ matrix.python-version }} | |
# steps: | |
# - uses: actions/checkout@v2 | |
# - name: Install dependencies | |
# run: | | |
# python -m pip install --upgrade pip | |
# pip install flake8 pytest pytest-cov numpy datasets evaluate scikit-learn sacrebleu | |
# if [ -f requirements.txt ]; then pip install -r requirements.txt --find-links https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html; fi | |
# - name: Install pavel's huggingface fork | |
# run: pip install git+https://github.com/huggingface/transformers.git@main sentencepiece six sacremoses | |
# - name: Install pippy | |
# run: "python setup.py install" | |
# - name: "HF Causal Language Modeling: fine-tune GPT-2 on WikiText-2" | |
# run: python examples/hf/language-modeling/run_clm.py --dp_group_size=2 --pp_group_size=8 --model_name_or_path gpt2 --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 --per_device_train_batch_size 8 --per_device_eval_batch_size 8 --do_train --do_eval --output_dir /tmp/test-clm --max_steps=3 --overwrite_output_dir | |
# - name: "HF Masked Language Modeling: fine-tune RoBERTa on WikiText-2" | |
# run: python examples/hf/language-modeling/run_mlm.py --dp_group_size=2 --pp_group_size=8 --model_name_or_path roberta-base --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 --per_device_train_batch_size 8 --per_device_eval_batch_size 8 --do_train --do_eval --output_dir /tmp/test-mlm --max_steps=3 --overwrite_output_dir | |
# - name: "HF Text classification: fine-tune BERT on the GLUE benchmark" | |
# run: python examples/hf/text-classification/run_glue.py --dp_group_size=2 --pp_group_size=8 --model_name_or_path bert-base-cased --task_name mrpc --do_train --do_eval --max_seq_length 128 --per_device_train_batch_size 32 --learning_rate 2e-5 --num_train_epochs 3 --output_dir /tmp/mrpc/ --max_steps=3 --overwrite_output_dir | |
# TODO: | |
# Update GPU test to use template in: | |
# https://github.com/pytorch/test-infra/wiki/Writing-generic-CI-jobs |