Skip to content

Commit

Permalink
Add torch/tf pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
ydshieh committed Apr 2, 2024
1 parent 1a64202 commit 4f9a39a
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 105 deletions.
18 changes: 18 additions & 0 deletions .github/workflows/self-scheduled-caller.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,24 @@ jobs:
env_name_for_slack_report_channel: CI_SLACK_CHANNEL_DUMMY_TESTS
secrets: inherit

torch-pipeline:
name: Torch pipeline CI
uses: ./.github/workflows/self-scheduled.yml
with:
job: run_pipelines_torch_gpu
# See the comment for `ENV_NAME_FOR_CI_SLACK_REPORT_CHANNEL_ID` in `.github/workflows/slack-report.yml`.
env_name_for_slack_report_channel: CI_SLACK_CHANNEL_DUMMY_TESTS
secrets: inherit

tf-pipeline:
name: TF pipeline CI
uses: ./.github/workflows/self-scheduled.yml
with:
job: run_pipelines_tf_gpu
# See the comment for `ENV_NAME_FOR_CI_SLACK_REPORT_CHANNEL_ID` in `.github/workflows/slack-report.yml`.
env_name_for_slack_report_channel: CI_SLACK_CHANNEL_DUMMY_TESTS
secrets: inherit

example-ci:
name: Example CI
uses: ./.github/workflows/self-scheduled.yml
Expand Down
206 changes: 104 additions & 102 deletions .github/workflows/self-scheduled.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,109 @@ jobs:
slice_id: ${{ matrix.slice_id }}
secrets: inherit

run_pipelines_torch_gpu:
if: ${{ inputs.job == 'run_pipelines_torch_gpu' }}
name: PyTorch pipelines
strategy:
fail-fast: false
matrix:
machine_type: [single-gpu, multi-gpu]
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
container:
image: huggingface/transformers-pytorch-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
needs: setup
steps:
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }}

- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .

- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze

- name: Run all pipeline tests on GPU
working-directory: /transformers
run: |
python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_torch_pipeline_gpu tests/pipelines
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu/failures_short.txt

- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
name: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu
path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu

run_pipelines_tf_gpu:
if: ${{ inputs.job == 'run_pipelines_tf_gpu' }}
name: TensorFlow pipelines
strategy:
fail-fast: false
matrix:
machine_type: [single-gpu, multi-gpu]
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
container:
image: huggingface/transformers-tensorflow-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
needs: setup
steps:
- name: Update clone
working-directory: /transformers
run: |
git fetch && git checkout ${{ github.sha }}
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
working-directory: /transformers
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .

- name: NVIDIA-SMI
run: |
nvidia-smi
- name: Environment
working-directory: /transformers
run: |
python3 utils/print_env.py
- name: Show installed libraries and their versions
working-directory: /transformers
run: pip freeze

- name: Run all pipeline tests on GPU
working-directory: /transformers
run: |
python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_tf_pipeline_gpu tests/pipelines
- name: Failure short reports
if: ${{ always() }}
run: |
cat /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu/failures_short.txt
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
name: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu
path: /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu

run_examples_gpu:
if: ${{ inputs.job == 'run_examples_gpu' }}
name: Examples directory
Expand Down Expand Up @@ -140,107 +243,6 @@ jobs:
name: ${{ matrix.machine_type }}_run_examples_gpu
path: /transformers/reports/${{ matrix.machine_type }}_examples_gpu

# run_pipelines_torch_gpu:
# name: PyTorch pipelines
# strategy:
# fail-fast: false
# matrix:
# machine_type: [single-gpu, multi-gpu]
# runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
# container:
# image: huggingface/transformers-pytorch-gpu
# options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
# needs: setup
# steps:
# - name: Update clone
# working-directory: /transformers
# run: git fetch && git checkout ${{ github.sha }}
#
# - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
# working-directory: /transformers
# run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
#
# - name: NVIDIA-SMI
# run: |
# nvidia-smi
#
# - name: Environment
# working-directory: /transformers
# run: |
# python3 utils/print_env.py
#
# - name: Show installed libraries and their versions
# working-directory: /transformers
# run: pip freeze
#
# - name: Run all pipeline tests on GPU
# working-directory: /transformers
# run: |
# python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_torch_pipeline_gpu tests/pipelines
#
# - name: Failure short reports
# if: ${{ failure() }}
# continue-on-error: true
# run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu/failures_short.txt
#
# - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu"
# if: ${{ always() }}
# uses: actions/upload-artifact@v3
# with:
# name: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu
# path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu
#
# run_pipelines_tf_gpu:
# name: TensorFlow pipelines
# strategy:
# fail-fast: false
# matrix:
# machine_type: [single-gpu, multi-gpu]
# runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
# container:
# image: huggingface/transformers-tensorflow-gpu
# options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
# needs: setup
# steps:
# - name: Update clone
# working-directory: /transformers
# run: |
# git fetch && git checkout ${{ github.sha }}
#
# - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
# working-directory: /transformers
# run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
#
# - name: NVIDIA-SMI
# run: |
# nvidia-smi
#
# - name: Environment
# working-directory: /transformers
# run: |
# python3 utils/print_env.py
#
# - name: Show installed libraries and their versions
# working-directory: /transformers
# run: pip freeze
#
# - name: Run all pipeline tests on GPU
# working-directory: /transformers
# run: |
# python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_tests_tf_pipeline_gpu tests/pipelines
#
# - name: Failure short reports
# if: ${{ always() }}
# run: |
# cat /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu/failures_short.txt
#
# - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu"
# if: ${{ always() }}
# uses: actions/upload-artifact@v3
# with:
# name: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu
# path: /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu
#
run_all_tests_torch_cuda_extensions_gpu:
if: ${{ inputs.job == 'run_all_tests_torch_cuda_extensions_gpu' }}
name: Torch CUDA extension tests
Expand Down Expand Up @@ -394,7 +396,7 @@ jobs:

send_results:
name: Slack Report
needs: [setup, run_tests_gpu, run_examples_gpu, run_all_tests_torch_cuda_extensions_gpu, run_tests_quantization_torch_gpu, run_extract_warnings]
needs: [setup, run_tests_gpu, run_pipelines_torch_gpu, run_pipelines_tf_gpu, run_examples_gpu, run_all_tests_torch_cuda_extensions_gpu, run_tests_quantization_torch_gpu, run_extract_warnings]
if: ${{ always() }}
uses: ./.github/workflows/slack-report.yml
with:
Expand Down
6 changes: 3 additions & 3 deletions utils/notification_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -1058,9 +1058,9 @@ def prepare_reports(title, header, reports, to_truncate=True):

# Additional runs
additional_files = {
"Examples directory": "run_examples_gpu",
"PyTorch pipelines": "run_tests_torch_pipeline_gpu",
"TensorFlow pipelines": "run_tests_tf_pipeline_gpu",
"Examples directory": "run_examples_gpu",
"Torch CUDA extension tests": "run_tests_torch_cuda_extensions_gpu_test_reports",
"Quantization tests": "run_tests_quantization_torch_gpu",
}
Expand All @@ -1079,9 +1079,9 @@ def prepare_reports(title, header, reports, to_truncate=True):
# `additional_files`. This is used to remove some entries in `additional_files` that are not concerned by a
# specific job. See below.
job_to_test_map = {
"run_pipelines_torch_gpu": "PyTorch pipelines",
"run_pipelines_tf_gpu": "TensorFlow pipelines",
"run_examples_gpu": "Examples directory",
# "": "PyTorch pipelines",
# "": "TensorFlow pipelines",
"run_all_tests_torch_cuda_extensions_gpu": "Torch CUDA extension tests",
"run_tests_quantization_torch_gpu": "Quantization tests",
}
Expand Down

0 comments on commit 4f9a39a

Please sign in to comment.