From 583dadc18584cc41cecb5456152a25054e6471ff Mon Sep 17 00:00:00 2001 From: ydshieh Date: Wed, 29 May 2024 18:27:05 +0200 Subject: [PATCH] build --- .github/workflows/self-nightly-caller.yml | 29 +++++++++++++++++++ .../workflows/self-nightly-past-ci-caller.yml | 2 +- .github/workflows/self-scheduled.yml | 15 ++++++---- 3 files changed, 39 insertions(+), 7 deletions(-) create mode 100644 .github/workflows/self-nightly-caller.yml diff --git a/.github/workflows/self-nightly-caller.yml b/.github/workflows/self-nightly-caller.yml new file mode 100644 index 00000000000000..8c91e9d1bb5e4a --- /dev/null +++ b/.github/workflows/self-nightly-caller.yml @@ -0,0 +1,29 @@ +name: Self-hosted runner (nightly-ci) + + +on: + repository_dispatch: + schedule: + - cron: "17 2 * * *" + push: + branches: + - build-cleanup-docker-build + +jobs: + model-ci: + name: Model CI + uses: ./.github/workflows/self-scheduled.yml + with: + job: run_models_gpu + slack_report_channel: "#transformers-ci-past-future" + runner: past-ci + secrets: inherit + + deepspeed-ci: + name: DeepSpeed CI + uses: ./.github/workflows/self-scheduled.yml + with: + job: run_torch_cuda_extensions_gpu + slack_report_channel: "#transformers-ci-past-future" + runner: past-ci + secrets: inherit diff --git a/.github/workflows/self-nightly-past-ci-caller.yml b/.github/workflows/self-nightly-past-ci-caller.yml index 3f1cd0fa522f57..a8e48f5ab69c6f 100644 --- a/.github/workflows/self-nightly-past-ci-caller.yml +++ b/.github/workflows/self-nightly-past-ci-caller.yml @@ -9,7 +9,7 @@ on: branches: - run_nightly_ci* - run_past_ci* - - build-cleanup-docker-build + - build-cleanup-docker-build-temp jobs: # build_nightly_ci_images: diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index 5911c81bf4f95d..ba79882786b606 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -15,6 +15,9 @@ on: slack_report_channel: required: true type: string + runner: + required: true + type: string env: HF_HOME: /mnt/cache @@ -38,7 +41,7 @@ jobs: strategy: matrix: machine_type: [single-gpu, multi-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci] + runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}'] container: image: huggingface/transformers-all-latest-gpu options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ @@ -105,7 +108,7 @@ jobs: fail-fast: false matrix: machine_type: [single-gpu, multi-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci] + runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}'] container: image: huggingface/transformers-pytorch-gpu options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ @@ -155,7 +158,7 @@ jobs: fail-fast: false matrix: machine_type: [single-gpu, multi-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci] + runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}'] container: image: huggingface/transformers-tensorflow-gpu options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ @@ -206,7 +209,7 @@ jobs: fail-fast: false matrix: machine_type: [single-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci] + runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}'] container: image: huggingface/transformers-all-latest-gpu options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ @@ -257,7 +260,7 @@ jobs: fail-fast: false matrix: machine_type: [single-gpu, multi-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci] + runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}'] container: image: huggingface/transformers-pytorch-deepspeed-latest-gpu options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ @@ -319,7 +322,7 @@ jobs: matrix: folders: ${{ fromJson(needs.setup.outputs.quantization_matrix) }} machine_type: [single-gpu, multi-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci] + runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}'] container: image: huggingface/transformers-quantization-latest-gpu options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/