From a7fd253c80e1b71c97c1998e336bfee91cbe62ac Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Tue, 14 May 2024 11:10:44 -0400 Subject: [PATCH] Propagate: need to revert before merging --- .github/workflows/self-scheduled-caller.yml | 40 +-------------------- .github/workflows/self-scheduled.yml | 4 +-- 2 files changed, 3 insertions(+), 41 deletions(-) diff --git a/.github/workflows/self-scheduled-caller.yml b/.github/workflows/self-scheduled-caller.yml index 40689c629a09bf..5b79b1e5cf6fb3 100644 --- a/.github/workflows/self-scheduled-caller.yml +++ b/.github/workflows/self-scheduled-caller.yml @@ -8,40 +8,9 @@ on: push: branches: - run_scheduled_ci* + - muellerzr-ds-investigation jobs: - model-ci: - name: Model CI - uses: ./.github/workflows/self-scheduled.yml - with: - job: run_models_gpu - slack_report_channel: "#transformers-ci-daily-models" - secrets: inherit - - torch-pipeline: - name: Torch pipeline CI - uses: ./.github/workflows/self-scheduled.yml - with: - job: run_pipelines_torch_gpu - slack_report_channel: "#transformers-ci-daily-pipeline-torch" - secrets: inherit - - tf-pipeline: - name: TF pipeline CI - uses: ./.github/workflows/self-scheduled.yml - with: - job: run_pipelines_tf_gpu - slack_report_channel: "#transformers-ci-daily-pipeline-tf" - secrets: inherit - - example-ci: - name: Example CI - uses: ./.github/workflows/self-scheduled.yml - with: - job: run_examples_gpu - slack_report_channel: "#transformers-ci-daily-examples" - secrets: inherit - deepspeed-ci: name: DeepSpeed CI uses: ./.github/workflows/self-scheduled.yml @@ -50,10 +19,3 @@ jobs: slack_report_channel: "#transformers-ci-daily-deepspeed" secrets: inherit - quantization-ci: - name: Quantization CI - uses: ./.github/workflows/self-scheduled.yml - with: - job: run_quantization_torch_gpu - slack_report_channel: "#transformers-ci-daily-quantization" - secrets: inherit diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index 5911c81bf4f95d..13df708200acc2 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -259,7 +259,7 @@ jobs: machine_type: [single-gpu, multi-gpu] runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci] container: - image: huggingface/transformers-pytorch-deepspeed-latest-gpu + image: huggingface/transformers-pytorch-deepspeed-latest-gpu-test options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ steps: - name: Update clone @@ -278,7 +278,7 @@ jobs: working-directory: /workspace run: | python3 -m pip uninstall -y deepspeed - DS_DISABLE_NINJA=1 DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check + DS_DISABLE_NINJA=1 DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install "deepspeed<=0.14.0" --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check - name: NVIDIA-SMI run: |