-
Notifications
You must be signed in to change notification settings - Fork 87
183 lines (169 loc) · 9.18 KB
/
pippy_tests.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
name: PiPPy tests
on:
push:
branches:
- main
pull_request:
paths:
- '.github/workflows/pippy**'
- 'pippy/**'
- 'test/**'
- 'examples/**'
- '!docs/**'
- '!**.md'
- 'requirements.txt'
concurrency:
# Cancel CI on previous commit when a new commit is pushed to the same branch
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
# pytest_tests:
# runs-on: linux.4xlarge
# strategy:
# matrix:
# python-version: ["3.8", "3.9"]
# container:
# image: python:${{ matrix.python-version }}
# steps:
# - uses: actions/checkout@v2
# - name: Install dependencies
# run: |
# python -m pip install --upgrade pip
# pip install flake8 pytest pytest-cov pytest-xdist numpy
# if [ -f requirements.txt ]; then pip install -r requirements.txt --find-links https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html; fi
# - name: Install pippy
# run: "python setup.py install"
# - name: Test with pytest
# run: |
# pytest --cov=pippy test/
# hf_model_tests:
# runs-on: linux.12xlarge
# strategy:
# matrix:
# python-version: ["3.9"]
# shard: ["0", "1", "2", "3", "4", "5", "6", "7"]
# container:
# image: python:${{ matrix.python-version }}
# steps:
# - uses: actions/checkout@v2
# - name: Install dependencies
# run: |
# python -m pip install --upgrade pip
# pip install flake8 pytest pytest-cov pytest-xdist pytest-shard numpy
# if [ -f requirements.txt ]; then pip install -r requirements.txt --find-links https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html; fi
# - name: Install pavel's huggingface fork
# run: pip install git+https://github.com/huggingface/transformers.git@main sentencepiece six sacremoses
# - name: Install pippy
# run: "python setup.py install"
# # Single thread to avoid OOM
# - name: Test forward only
# run: |
# pytest --shard-id=${{ matrix.shard }} --num-shards=8 -k 'not HFModelsForwardBackwardTest' -sv --cov=pippy test/hf_test.py
# - name: Test forward and backward
# run: |
# pytest --shard-id=${{ matrix.shard }} --num-shards=8 -k 'HFModelsForwardBackwardTest' -sv --cov=pippy test/hf_test.py
integration_test_cpu:
runs-on: linux.4xlarge
strategy:
matrix:
python-version: ["3.9"]
schedule: ["FillDrain"]
env:
OMP_NUM_THREADS: "1"
container:
image: python:${{ matrix.python-version }}
steps:
- uses: actions/checkout@v2
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 pytest pytest-cov numpy datasets evaluate scikit-learn sacrebleu
if [ -f requirements.txt ]; then pip install -r requirements.txt --find-links https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html; fi
- name: Install pippy
run: "python setup.py install"
- name: Test forward pipe generation
run: python test/test_pipe.py
- name: Test backward pipe generation
run: python test/test_pipe_bwd.py
- name: Run forward-only integration test
run: torchrun --nproc-per-node 4 test/test_fwd.py
- name: Run forward-loss-backward integration test
run: torchrun --nproc-per-node 4 test/test_bwd.py --schedule ${{ matrix.schedule }}
# - name: Run null_coalesce_accumulate integration test
# run: python test/local_test_null_coalesce_accumulate.py --replicate ${{ matrix.replicate }} --schedule ${{ matrix.schedule }}
# - name: Run PP + DDP test
# run: python test/local_test_ddp.py --replicate ${{ matrix.replicate }} --schedule ${{ matrix.schedule }} --checkpoint ${{ matrix.checkpoint }}
#- name: Run HF BERT forward-only integration test
# run: python test/local_test_forward_hf_bert.py --replicate ${{ matrix.replicate }} --schedule ${{ matrix.schedule }} --checkpoint ${{ matrix.checkpoint }}
# - name: Run HF GPT2 forward-only integration test
# run: python test/local_test_forward_hf_gpt2.py --replicate ${{ matrix.replicate }} --schedule ${{ matrix.schedule }} --checkpoint ${{ matrix.checkpoint }}
# - name: Run auto-split test
# run: python test/local_test_autosplit.py --replicate ${{ matrix.replicate }} --schedule ${{ matrix.schedule }}
# hf_examples_set1:
# runs-on: linux.12xlarge
# strategy:
# matrix:
# python-version: ["3.9"]
# schedule: ["FillDrain", "1F1B"]
# env:
# OMP_NUM_THREADS: "1"
# container:
# image: python:${{ matrix.python-version }}
# steps:
# - uses: actions/checkout@v2
# - name: Install dependencies
# run: |
# python -m pip install --upgrade pip
# pip install flake8 pytest pytest-cov numpy datasets evaluate scikit-learn sacrebleu
# if [ -f requirements.txt ]; then pip install -r requirements.txt --find-links https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html; fi
# - name: Install pavel's huggingface fork
# run: pip install git+https://github.com/huggingface/transformers.git@main sentencepiece six sacremoses
# - name: Install pippy
# run: "python setup.py install"
# - name: Test min-GPT
# run: |
# git config --global --add safe.directory /__w/tau/tau
# git submodule update --init test/minGPT
# python test/min_gpt_tracing.py
# - name: Run GPT2 example
# run: python examples/hf/gpt2/pippy_gpt2.py --schedule ${{ matrix.schedule }}
# - name: Run BERT example
# run: python examples/hf/bert/pippy_bert.py --schedule ${{ matrix.schedule }}
# - name: Run T5 example
# run: python examples/hf/t5/pippy_t5.py --schedule ${{ matrix.schedule }}
# - name: "HF Translation: fine-tune T5 model translation English to Romanian"
# run: >
# python examples/hf/translation/run_translation.py --model_name_or_path t5-small --do_train --source_lang en --target_lang ro --source_prefix "translate English to Romanian: " --dataset_name wmt16 --dataset_config_name ro-en --output_dir /tmp/tst-translation --per_device_train_batch_size=8 --per_device_eval_batch_size=8 --overwrite_output_dir --predict_with_generate --max_steps=10 --dp_group_size=1 --pp_group_size=8
# - name: "HF Translation: fine-tune BART model translation English to Romanian"
# run: >
# python examples/hf/translation/run_translation.py --model_name_or_path facebook/bart-base --do_train --source_lang en --target_lang ro --source_prefix "translate English to Romanian: " --dataset_name wmt16 --dataset_config_name ro-en --output_dir /tmp/tst-translation --per_device_train_batch_size=8 --per_device_eval_batch_size=8 --overwrite_output_dir --predict_with_generate --max_steps=10 --dp_group_size=2 --pp_group_size=8
# hf_examples_set2:
# runs-on: linux.12xlarge
# strategy:
# matrix:
# python-version: ["3.9"]
# schedule: ["FillDrain", "1F1B"]
# env:
# OMP_NUM_THREADS: "1"
# container:
# image: python:${{ matrix.python-version }}
# steps:
# - uses: actions/checkout@v2
# - name: Install dependencies
# run: |
# python -m pip install --upgrade pip
# pip install flake8 pytest pytest-cov numpy datasets evaluate scikit-learn sacrebleu
# if [ -f requirements.txt ]; then pip install -r requirements.txt --find-links https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html; fi
# - name: Install pavel's huggingface fork
# run: pip install git+https://github.com/huggingface/transformers.git@main sentencepiece six sacremoses
# - name: Install pippy
# run: "python setup.py install"
# - name: "HF Causal Language Modeling: fine-tune GPT-2 on WikiText-2"
# run: python examples/hf/language-modeling/run_clm.py --dp_group_size=2 --pp_group_size=8 --model_name_or_path gpt2 --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 --per_device_train_batch_size 8 --per_device_eval_batch_size 8 --do_train --do_eval --output_dir /tmp/test-clm --max_steps=3 --overwrite_output_dir
# - name: "HF Masked Language Modeling: fine-tune RoBERTa on WikiText-2"
# run: python examples/hf/language-modeling/run_mlm.py --dp_group_size=2 --pp_group_size=8 --model_name_or_path roberta-base --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 --per_device_train_batch_size 8 --per_device_eval_batch_size 8 --do_train --do_eval --output_dir /tmp/test-mlm --max_steps=3 --overwrite_output_dir
# - name: "HF Text classification: fine-tune BERT on the GLUE benchmark"
# run: python examples/hf/text-classification/run_glue.py --dp_group_size=2 --pp_group_size=8 --model_name_or_path bert-base-cased --task_name mrpc --do_train --do_eval --max_seq_length 128 --per_device_train_batch_size 32 --learning_rate 2e-5 --num_train_epochs 3 --output_dir /tmp/mrpc/ --max_steps=3 --overwrite_output_dir
# TODO:
# Update GPU test to use template in:
# https://github.com/pytorch/test-infra/wiki/Writing-generic-CI-jobs