From bd06bdbee95aee3b43c1d1c4a2be2567cef32abf Mon Sep 17 00:00:00 2001 From: JingyaHuang Date: Thu, 7 Dec 2023 23:28:55 +0000 Subject: [PATCH 1/4] update cis --- .github/workflows/test_exporters_gpu.yml | 64 +------------------- .github/workflows/test_onnxruntime_gpu.yml | 64 +------------------- .github/workflows/test_onnxruntime_train.yml | 64 +------------------- 3 files changed, 3 insertions(+), 189 deletions(-) diff --git a/.github/workflows/test_exporters_gpu.yml b/.github/workflows/test_exporters_gpu.yml index 36b6179bd41..3ae9129f71c 100644 --- a/.github/workflows/test_exporters_gpu.yml +++ b/.github/workflows/test_exporters_gpu.yml @@ -12,47 +12,9 @@ on: # - main jobs: - start-runner: - if: ${{ (github.event_name == 'workflow_dispatch') || (github.event_name == 'schedule') || contains( github.event.pull_request.labels.*.name, 'gpu-test') }} - name: Start self-hosted EC2 runner - runs-on: ubuntu-latest - env: - AWS_REGION: us-east-1 - EC2_AMI_ID: ami-0dc1c26161f869ed1 - EC2_INSTANCE_TYPE: g4dn.xlarge - EC2_SUBNET_ID: subnet-859322b4,subnet-b7533b96,subnet-47cfad21,subnet-a396b2ad,subnet-06576a4b,subnet-df0f6180 - EC2_SECURITY_GROUP: sg-0bb210cd3ec725a13 - EC2_IAM_ROLE: optimum-ec2-github-actions-role - outputs: - label: ${{ steps.start-ec2-runner.outputs.label }} - ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ env.AWS_REGION }} - - name: Start EC2 runner - id: start-ec2-runner - uses: philschmid/philschmid-ec2-github-runner@main - with: - mode: start - github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} - ec2-image-id: ${{ env.EC2_AMI_ID }} - ec2-instance-type: ${{ env.EC2_INSTANCE_TYPE }} - subnet-id: ${{ env.EC2_SUBNET_ID }} - security-group-id: ${{ env.EC2_SECURITY_GROUP }} - iam-role-name: ${{ env.EC2_IAM_ROLE }} - aws-resource-tags: > # optional, requires additional permissions - [ - {"Key": "Name", "Value": "ec2-optimum-github-runner"}, - {"Key": "GitHubRepository", "Value": "${{ github.repository }}"} - ] do-the-job: name: Setup - needs: start-runner # required to start the main job when the runner is ready - runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner + runs-on: [self-hosted, ci] # run the job on the newly created runner env: AWS_REGION: us-east-1 steps: @@ -64,27 +26,3 @@ jobs: - name: Test with unittest within docker container run: | docker run --rm --gpus all -v $(pwd)/hf_cache:/root/.cache/huggingface --workdir=/workspace/optimum/tests exporters-gpu:latest - - stop-runner: - name: Stop self-hosted EC2 runner - needs: - - start-runner # required to get output from the start-runner job - - do-the-job # required to wait when the main job is done - runs-on: ubuntu-latest - env: - AWS_REGION: us-east-1 - if: ${{ always() && !(needs.start-runner.result == 'skipped' && needs.do-the-job.result == 'skipped') }} # required to stop the runner even if the error happened in the previous jobs - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ env.AWS_REGION }} - - name: Stop EC2 runner - uses: philschmid/philschmid-ec2-github-runner@main - with: - mode: stop - github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} - label: ${{ needs.start-runner.outputs.label }} - ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} diff --git a/.github/workflows/test_onnxruntime_gpu.yml b/.github/workflows/test_onnxruntime_gpu.yml index 838c9379bdd..00e26794eae 100644 --- a/.github/workflows/test_onnxruntime_gpu.yml +++ b/.github/workflows/test_onnxruntime_gpu.yml @@ -12,47 +12,9 @@ on: # - main jobs: - start-runner: - if: ${{ (github.event_name == 'workflow_dispatch') || (github.event_name == 'schedule') || contains( github.event.pull_request.labels.*.name, 'gpu-test') }} - name: Start self-hosted EC2 runner - runs-on: ubuntu-latest - env: - AWS_REGION: us-east-1 - EC2_AMI_ID: ami-0dc1c26161f869ed1 - EC2_INSTANCE_TYPE: g4dn.xlarge - EC2_SUBNET_ID: subnet-859322b4,subnet-b7533b96,subnet-47cfad21,subnet-a396b2ad,subnet-06576a4b,subnet-df0f6180 - EC2_SECURITY_GROUP: sg-0bb210cd3ec725a13 - EC2_IAM_ROLE: optimum-ec2-github-actions-role - outputs: - label: ${{ steps.start-ec2-runner.outputs.label }} - ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ env.AWS_REGION }} - - name: Start EC2 runner - id: start-ec2-runner - uses: philschmid/philschmid-ec2-github-runner@main - with: - mode: start - github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} - ec2-image-id: ${{ env.EC2_AMI_ID }} - ec2-instance-type: ${{ env.EC2_INSTANCE_TYPE }} - subnet-id: ${{ env.EC2_SUBNET_ID }} - security-group-id: ${{ env.EC2_SECURITY_GROUP }} - iam-role-name: ${{ env.EC2_IAM_ROLE }} - aws-resource-tags: > # optional, requires additional permissions - [ - {"Key": "Name", "Value": "ec2-optimum-github-runner"}, - {"Key": "GitHubRepository", "Value": "${{ github.repository }}"} - ] do-the-job: name: Setup - needs: start-runner # required to start the main job when the runner is ready - runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner + runs-on: [self-hosted, ci] # run the job on the newly created runner env: AWS_REGION: us-east-1 steps: @@ -64,27 +26,3 @@ jobs: - name: Test with unittest within docker container run: | docker run --rm --gpus all -v $(pwd)/hf_cache:/root/.cache/huggingface --workdir=/workspace/optimum/tests onnxruntime-gpu:latest - - stop-runner: - name: Stop self-hosted EC2 runner - needs: - - start-runner # required to get output from the start-runner job - - do-the-job # required to wait when the main job is done - runs-on: ubuntu-latest - env: - AWS_REGION: us-east-1 - if: ${{ always() && !(needs.start-runner.result == 'skipped' && needs.do-the-job.result == 'skipped') }} # required to stop the runner even if the error happened in the previous jobs - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ env.AWS_REGION }} - - name: Stop EC2 runner - uses: philschmid/philschmid-ec2-github-runner@main - with: - mode: stop - github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} - label: ${{ needs.start-runner.outputs.label }} - ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} diff --git a/.github/workflows/test_onnxruntime_train.yml b/.github/workflows/test_onnxruntime_train.yml index 4b70a56b15d..441bd47c6e7 100644 --- a/.github/workflows/test_onnxruntime_train.yml +++ b/.github/workflows/test_onnxruntime_train.yml @@ -8,47 +8,9 @@ on: types: [opened, synchronize, reopened, labeled] jobs: - start-runner: - if: ${{ (github.event_name == 'workflow_dispatch') || (github.event_name == 'schedule') || contains( github.event.pull_request.labels.*.name, 'training')}} - name: Start self-hosted EC2 runner - runs-on: ubuntu-latest - env: - AWS_REGION: us-east-1 - EC2_AMI_ID: ami-0dc1c26161f869ed1 - EC2_INSTANCE_TYPE: g4dn.xlarge - EC2_SUBNET_ID: subnet-859322b4,subnet-b7533b96,subnet-47cfad21,subnet-a396b2ad,subnet-06576a4b,subnet-df0f6180 - EC2_SECURITY_GROUP: sg-0bb210cd3ec725a13 - EC2_IAM_ROLE: optimum-ec2-github-actions-role - outputs: - label: ${{ steps.start-ec2-runner.outputs.label }} - ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ env.AWS_REGION }} - - name: Start EC2 runner - id: start-ec2-runner - uses: philschmid/philschmid-ec2-github-runner@main - with: - mode: start - github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} - ec2-image-id: ${{ env.EC2_AMI_ID }} - ec2-instance-type: ${{ env.EC2_INSTANCE_TYPE }} - subnet-id: ${{ env.EC2_SUBNET_ID }} - security-group-id: ${{ env.EC2_SECURITY_GROUP }} - iam-role-name: ${{ env.EC2_IAM_ROLE }} - aws-resource-tags: > # optional, requires additional permissions - [ - {"Key": "Name", "Value": "ec2-optimum-github-runner"}, - {"Key": "GitHubRepository", "Value": "${{ github.repository }}"} - ] do-the-job: name: Run ORTTrainer test - needs: start-runner # required to start the main job when the runner is ready - runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner + runs-on: [self-hosted, ci] # run the job on the newly created runner env: AWS_REGION: us-east-1 steps: @@ -60,27 +22,3 @@ jobs: - name: Run test within docker container run: | docker run --rm --gpus all -v $(pwd)/hf_cache:/root/.cache/huggingface --workdir=/workspace/optimum/tests onnxruntime/train:latest - - stop-runner: - name: Stop self-hosted EC2 runner - needs: - - start-runner # required to get output from the start-runner job - - do-the-job # required to wait when the main job is done - runs-on: ubuntu-latest - env: - AWS_REGION: us-east-1 - if: ${{ always() && !(needs.start-runner.result == 'skipped' && needs.do-the-job.result == 'skipped') }} # required to stop the runner even if the error happened in the previous jobs - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ env.AWS_REGION }} - - name: Stop EC2 runner - uses: philschmid/philschmid-ec2-github-runner@main - with: - mode: stop - github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} - label: ${{ needs.start-runner.outputs.label }} - ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} From ff50c689d6706a805f9c56d47ca00ae34f11b3d4 Mon Sep 17 00:00:00 2001 From: Guillaume LEGENDRE Date: Fri, 8 Dec 2023 11:35:34 +0100 Subject: [PATCH 2/4] move to new ci --- .github/workflows/test_onnxruntime_gpu.yml | 66 +--------------------- 1 file changed, 2 insertions(+), 64 deletions(-) diff --git a/.github/workflows/test_onnxruntime_gpu.yml b/.github/workflows/test_onnxruntime_gpu.yml index 838c9379bdd..75e3a8dc29d 100644 --- a/.github/workflows/test_onnxruntime_gpu.yml +++ b/.github/workflows/test_onnxruntime_gpu.yml @@ -12,47 +12,9 @@ on: # - main jobs: - start-runner: - if: ${{ (github.event_name == 'workflow_dispatch') || (github.event_name == 'schedule') || contains( github.event.pull_request.labels.*.name, 'gpu-test') }} - name: Start self-hosted EC2 runner - runs-on: ubuntu-latest - env: - AWS_REGION: us-east-1 - EC2_AMI_ID: ami-0dc1c26161f869ed1 - EC2_INSTANCE_TYPE: g4dn.xlarge - EC2_SUBNET_ID: subnet-859322b4,subnet-b7533b96,subnet-47cfad21,subnet-a396b2ad,subnet-06576a4b,subnet-df0f6180 - EC2_SECURITY_GROUP: sg-0bb210cd3ec725a13 - EC2_IAM_ROLE: optimum-ec2-github-actions-role - outputs: - label: ${{ steps.start-ec2-runner.outputs.label }} - ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ env.AWS_REGION }} - - name: Start EC2 runner - id: start-ec2-runner - uses: philschmid/philschmid-ec2-github-runner@main - with: - mode: start - github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} - ec2-image-id: ${{ env.EC2_AMI_ID }} - ec2-instance-type: ${{ env.EC2_INSTANCE_TYPE }} - subnet-id: ${{ env.EC2_SUBNET_ID }} - security-group-id: ${{ env.EC2_SECURITY_GROUP }} - iam-role-name: ${{ env.EC2_IAM_ROLE }} - aws-resource-tags: > # optional, requires additional permissions - [ - {"Key": "Name", "Value": "ec2-optimum-github-runner"}, - {"Key": "GitHubRepository", "Value": "${{ github.repository }}"} - ] do-the-job: name: Setup - needs: start-runner # required to start the main job when the runner is ready - runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner + runs-on: [single-gpu, nvidia-gpu, t4, ci] env: AWS_REGION: us-east-1 steps: @@ -63,28 +25,4 @@ jobs: docker build -f tests/onnxruntime/docker/Dockerfile_onnxruntime_gpu -t onnxruntime-gpu . - name: Test with unittest within docker container run: | - docker run --rm --gpus all -v $(pwd)/hf_cache:/root/.cache/huggingface --workdir=/workspace/optimum/tests onnxruntime-gpu:latest - - stop-runner: - name: Stop self-hosted EC2 runner - needs: - - start-runner # required to get output from the start-runner job - - do-the-job # required to wait when the main job is done - runs-on: ubuntu-latest - env: - AWS_REGION: us-east-1 - if: ${{ always() && !(needs.start-runner.result == 'skipped' && needs.do-the-job.result == 'skipped') }} # required to stop the runner even if the error happened in the previous jobs - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ env.AWS_REGION }} - - name: Stop EC2 runner - uses: philschmid/philschmid-ec2-github-runner@main - with: - mode: stop - github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} - label: ${{ needs.start-runner.outputs.label }} - ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} + docker run --rm --gpus all -v /mnt/cache/.cache/huggingface:/root/.cache/huggingface --workdir=/workspace/optimum/tests onnxruntime-gpu:latest From 2750458577ac2840305fa42843939644b6fd47dc Mon Sep 17 00:00:00 2001 From: JingyaHuang Date: Fri, 8 Dec 2023 14:42:48 +0000 Subject: [PATCH 3/4] change for all self-hosted cis --- .github/workflows/doctests.yml | 65 +------------------ .../workflows/test_bettertransformer_gpu.yml | 65 +------------------ .github/workflows/test_exporters_gpu.yml | 7 +- .github/workflows/test_gptq.yml | 65 +------------------ .github/workflows/test_onnxruntime_gpu.yml | 3 +- .github/workflows/test_onnxruntime_train.yml | 5 +- 6 files changed, 15 insertions(+), 195 deletions(-) diff --git a/.github/workflows/doctests.yml b/.github/workflows/doctests.yml index beb4c269073..d2879157b16 100644 --- a/.github/workflows/doctests.yml +++ b/.github/workflows/doctests.yml @@ -7,46 +7,9 @@ on: - cron: 0 1 * * 0 # every sunday at 1am jobs: - start-runner: - name: Start self-hosted EC2 runner - runs-on: ubuntu-latest - env: - AWS_REGION: us-east-1 - EC2_AMI_ID: ami-0dc1c26161f869ed1 - EC2_INSTANCE_TYPE: g4dn.xlarge - EC2_SUBNET_ID: subnet-859322b4,subnet-b7533b96,subnet-47cfad21,subnet-a396b2ad,subnet-06576a4b,subnet-df0f6180 - EC2_SECURITY_GROUP: sg-0bb210cd3ec725a13 - EC2_IAM_ROLE: optimum-ec2-github-actions-role - outputs: - label: ${{ steps.start-ec2-runner.outputs.label }} - ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ env.AWS_REGION }} - - name: Start EC2 runner - id: start-ec2-runner - uses: philschmid/philschmid-ec2-github-runner@main - with: - mode: start - github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} - ec2-image-id: ${{ env.EC2_AMI_ID }} - ec2-instance-type: ${{ env.EC2_INSTANCE_TYPE }} - subnet-id: ${{ env.EC2_SUBNET_ID }} - security-group-id: ${{ env.EC2_SECURITY_GROUP }} - iam-role-name: ${{ env.EC2_IAM_ROLE }} - aws-resource-tags: > # optional, requires additional permissions - [ - {"Key": "Name", "Value": "ec2-optimum-github-runner"}, - {"Key": "GitHubRepository", "Value": "${{ github.repository }}"} - ] do-the-job: - name: Setup - needs: start-runner # required to start the main job when the runner is ready - runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner + name: Start self-hosted EC2 runner + runs-on: [single-gpu, nvidia-gpu, t4, ci] env: AWS_REGION: us-east-1 steps: @@ -58,27 +21,3 @@ jobs: - name: Test with unittest within docker container run: | docker run --rm --gpus all --workdir=/workspace/optimum/ onnxruntime-gpu:latest /bin/bash tests/run_doctest.sh - - stop-runner: - name: Stop self-hosted EC2 runner - needs: - - start-runner # required to get output from the start-runner job - - do-the-job # required to wait when the main job is done - runs-on: ubuntu-latest - env: - AWS_REGION: us-east-1 - if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ env.AWS_REGION }} - - name: Stop EC2 runner - uses: philschmid/philschmid-ec2-github-runner@main - with: - mode: stop - github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} - label: ${{ needs.start-runner.outputs.label }} - ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} diff --git a/.github/workflows/test_bettertransformer_gpu.yml b/.github/workflows/test_bettertransformer_gpu.yml index 88236394c19..78a2995a118 100644 --- a/.github/workflows/test_bettertransformer_gpu.yml +++ b/.github/workflows/test_bettertransformer_gpu.yml @@ -6,46 +6,9 @@ on: - cron: 0 1 */3 * * # at 1am every 3 days jobs: - start-runner: - name: Start self-hosted EC2 runner - runs-on: ubuntu-latest - env: - AWS_REGION: us-east-1 - EC2_AMI_ID: ami-0dc1c26161f869ed1 - EC2_INSTANCE_TYPE: g4dn.xlarge - EC2_SUBNET_ID: subnet-859322b4,subnet-b7533b96,subnet-47cfad21,subnet-a396b2ad,subnet-06576a4b,subnet-df0f6180 - EC2_SECURITY_GROUP: sg-0bb210cd3ec725a13 - EC2_IAM_ROLE: optimum-ec2-github-actions-role - outputs: - label: ${{ steps.start-ec2-runner.outputs.label }} - ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ env.AWS_REGION }} - - name: Start EC2 runner - id: start-ec2-runner - uses: philschmid/philschmid-ec2-github-runner@main - with: - mode: start - github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} - ec2-image-id: ${{ env.EC2_AMI_ID }} - ec2-instance-type: ${{ env.EC2_INSTANCE_TYPE }} - subnet-id: ${{ env.EC2_SUBNET_ID }} - security-group-id: ${{ env.EC2_SECURITY_GROUP }} - iam-role-name: ${{ env.EC2_IAM_ROLE }} - aws-resource-tags: > # optional, requires additional permissions - [ - {"Key": "Name", "Value": "ec2-optimum-github-runner"}, - {"Key": "GitHubRepository", "Value": "${{ github.repository }}"} - ] do-the-job: - name: Setup - needs: start-runner # required to start the main job when the runner is ready - runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner + name: Start self-hosted EC2 runner + runs-on: [single-gpu, nvidia-gpu, t4, ci] env: AWS_REGION: us-east-1 steps: @@ -57,27 +20,3 @@ jobs: - name: Test with unittest within docker container run: | docker run --rm --gpus all --workdir=/workspace/optimum/tests bettertransformer-gpu:latest - - stop-runner: - name: Stop self-hosted EC2 runner - needs: - - start-runner # required to get output from the start-runner job - - do-the-job # required to wait when the main job is done - runs-on: ubuntu-latest - env: - AWS_REGION: us-east-1 - if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ env.AWS_REGION }} - - name: Stop EC2 runner - uses: philschmid/philschmid-ec2-github-runner@main - with: - mode: stop - github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} - label: ${{ needs.start-runner.outputs.label }} - ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} diff --git a/.github/workflows/test_exporters_gpu.yml b/.github/workflows/test_exporters_gpu.yml index 3ae9129f71c..e51d49a7c9f 100644 --- a/.github/workflows/test_exporters_gpu.yml +++ b/.github/workflows/test_exporters_gpu.yml @@ -13,8 +13,9 @@ on: jobs: do-the-job: - name: Setup - runs-on: [self-hosted, ci] # run the job on the newly created runner + if: ${{ (github.event_name == 'workflow_dispatch') || (github.event_name == 'schedule') || contains( github.event.pull_request.labels.*.name, 'gpu-test') }} + name: Start self-hosted EC2 runner + runs-on: [single-gpu, nvidia-gpu, t4, ci] env: AWS_REGION: us-east-1 steps: @@ -25,4 +26,4 @@ jobs: docker build -f tests/exporters/Dockerfile_exporters_gpu -t exporters-gpu . - name: Test with unittest within docker container run: | - docker run --rm --gpus all -v $(pwd)/hf_cache:/root/.cache/huggingface --workdir=/workspace/optimum/tests exporters-gpu:latest + docker run --rm --gpus all -v /mnt/cache/.cache/huggingface:/root/.cache/huggingface --workdir=/workspace/optimum/tests exporters-gpu:latest diff --git a/.github/workflows/test_gptq.yml b/.github/workflows/test_gptq.yml index dcb7fb5565a..075c5c2db29 100644 --- a/.github/workflows/test_gptq.yml +++ b/.github/workflows/test_gptq.yml @@ -12,47 +12,10 @@ on: # - main jobs: - start-runner: + do-the-job: if: ${{ (github.event_name == 'workflow_dispatch') || (github.event_name == 'schedule') || contains( github.event.pull_request.labels.*.name, 'gpu-test') }} name: Start self-hosted EC2 runner - runs-on: ubuntu-latest - env: - AWS_REGION: us-east-1 - EC2_AMI_ID: ami-0dc1c26161f869ed1 - EC2_INSTANCE_TYPE: g4dn.xlarge - EC2_SUBNET_ID: subnet-859322b4,subnet-b7533b96,subnet-47cfad21,subnet-a396b2ad,subnet-06576a4b,subnet-df0f6180 - EC2_SECURITY_GROUP: sg-0bb210cd3ec725a13 - EC2_IAM_ROLE: optimum-ec2-github-actions-role - outputs: - label: ${{ steps.start-ec2-runner.outputs.label }} - ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ env.AWS_REGION }} - - name: Start EC2 runner - id: start-ec2-runner - uses: philschmid/philschmid-ec2-github-runner@main - with: - mode: start - github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} - ec2-image-id: ${{ env.EC2_AMI_ID }} - ec2-instance-type: ${{ env.EC2_INSTANCE_TYPE }} - subnet-id: ${{ env.EC2_SUBNET_ID }} - security-group-id: ${{ env.EC2_SECURITY_GROUP }} - iam-role-name: ${{ env.EC2_IAM_ROLE }} - aws-resource-tags: > # optional, requires additional permissions - [ - {"Key": "Name", "Value": "ec2-optimum-github-runner"}, - {"Key": "GitHubRepository", "Value": "${{ github.repository }}"} - ] - do-the-job: - name: Setup - needs: start-runner # required to start the main job when the runner is ready - runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner + runs-on: [single-gpu, nvidia-gpu, t4, ci] env: AWS_REGION: us-east-1 steps: @@ -64,27 +27,3 @@ jobs: - name: Test with unittest within docker container run: | docker run --rm --gpus all -v $(pwd)/hf_cache:/root/.cache/huggingface --workdir=/workspace/optimum/tests gptq-gpu:latest - - stop-runner: - name: Stop self-hosted EC2 runner - needs: - - start-runner # required to get output from the start-runner job - - do-the-job # required to wait when the main job is done - runs-on: ubuntu-latest - env: - AWS_REGION: us-east-1 - if: ${{ always() && !(needs.start-runner.result == 'skipped' && needs.do-the-job.result == 'skipped') }} # required to stop the runner even if the error happened in the previous jobs are all skipped - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ env.AWS_REGION }} - - name: Stop EC2 runner - uses: philschmid/philschmid-ec2-github-runner@main - with: - mode: stop - github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} - label: ${{ needs.start-runner.outputs.label }} - ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} diff --git a/.github/workflows/test_onnxruntime_gpu.yml b/.github/workflows/test_onnxruntime_gpu.yml index 75e3a8dc29d..b73369ec8b2 100644 --- a/.github/workflows/test_onnxruntime_gpu.yml +++ b/.github/workflows/test_onnxruntime_gpu.yml @@ -13,7 +13,8 @@ on: jobs: do-the-job: - name: Setup + if: ${{ (github.event_name == 'workflow_dispatch') || (github.event_name == 'schedule') || contains( github.event.pull_request.labels.*.name, 'gpu-test') }} + name: Start self-hosted EC2 runner runs-on: [single-gpu, nvidia-gpu, t4, ci] env: AWS_REGION: us-east-1 diff --git a/.github/workflows/test_onnxruntime_train.yml b/.github/workflows/test_onnxruntime_train.yml index 441bd47c6e7..7967f1b036b 100644 --- a/.github/workflows/test_onnxruntime_train.yml +++ b/.github/workflows/test_onnxruntime_train.yml @@ -9,8 +9,9 @@ on: jobs: do-the-job: + if: ${{ (github.event_name == 'workflow_dispatch') || (github.event_name == 'schedule') || contains( github.event.pull_request.labels.*.name, 'training')}} name: Run ORTTrainer test - runs-on: [self-hosted, ci] # run the job on the newly created runner + runs-on: [single-gpu, nvidia-gpu, t4, ci] env: AWS_REGION: us-east-1 steps: @@ -21,4 +22,4 @@ jobs: docker build -f tests/onnxruntime/docker/Dockerfile_onnxruntime_trainer -t onnxruntime/train . - name: Run test within docker container run: | - docker run --rm --gpus all -v $(pwd)/hf_cache:/root/.cache/huggingface --workdir=/workspace/optimum/tests onnxruntime/train:latest + docker run --rm --gpus all -v /mnt/cache/.cache/huggingface:/root/.cache/huggingface --workdir=/workspace/optimum/tests onnxruntime/train:latest \ No newline at end of file From 63717a157790dc7420eaab075cd8f6050da17c8e Mon Sep 17 00:00:00 2001 From: JingyaHuang Date: Fri, 8 Dec 2023 14:48:39 +0000 Subject: [PATCH 4/4] fix path for gptq --- .github/workflows/test_gptq.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_gptq.yml b/.github/workflows/test_gptq.yml index 075c5c2db29..0f3c31c6d2c 100644 --- a/.github/workflows/test_gptq.yml +++ b/.github/workflows/test_gptq.yml @@ -23,7 +23,7 @@ jobs: uses: actions/checkout@v2 - name: Build image run: | - docker build -f tests/gptq/docker/Dockerfile_quantization_gpu -t gptq-gpu . + docker build -f tests/gptq/Dockerfile_quantization_gpu -t gptq-gpu . - name: Test with unittest within docker container run: | docker run --rm --gpus all -v $(pwd)/hf_cache:/root/.cache/huggingface --workdir=/workspace/optimum/tests gptq-gpu:latest