Skip to content

Pin checkpoint state dict flattening patch (#3700) #1012

Pin checkpoint state dict flattening patch (#3700)

Pin checkpoint state dict flattening patch (#3700) #1012

Workflow file for this run

name: Daily
on:
schedule:
- cron: "30 2 * * *" # 2:30 every day
push:
branches:
- main
- release/**
workflow_dispatch:
# Cancel old runs when a new commit is pushed to the same branch if not on main or dev
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' && github.ref != 'refs/heads/dev' }}
jobs:
daily-pytest-cpu:
name: ${{ matrix.name }}
runs-on: ubuntu-latest
container: ${{ matrix.container }}
if: github.repository_owner == 'mosaicml'
strategy:
matrix:
include:
- name: cpu-3.11-2.3
container: mosaicml/pytorch:2.3.1_cpu-python3.11-ubuntu20.04
markers: not daily and (remote or not remote) and not gpu and not doctest
pytest_command: coverage run -m pytest
composer_package_name: mosaicml
- name: cpu-3.11-2.4
container: mosaicml/pytorch:2.4.1_cpu-python3.11-ubuntu20.04
markers: not daily and (remote or not remote) and not gpu and not doctest
pytest_command: coverage run -m pytest
composer_package_name: mosaicml
- name: cpu-3.11-2.5
container: mosaicml/pytorch:2.5.0_cpu-python3.11-ubuntu20.04
markers: not daily and (remote or not remote) and not gpu and not doctest
pytest_command: coverage run -m pytest
composer_package_name: mosaicml
- name: cpu-3.11-2.5-composer
container: mosaicml/pytorch:2.5.0_cpu-python3.11-ubuntu20.04
markers: not daily and (remote or not remote) and not gpu and not doctest
pytest_command: coverage run -m pytest
composer_package_name: composer
- name: cpu-doctest
container: mosaicml/pytorch:2.4.1_cpu-python3.11-ubuntu20.04
markers: not daily and (remote or not remote) and not gpu and doctest
pytest_command: coverage run -m pytest tests/test_docs.py
composer_package_name: mosaicml
- name: daily-cpu-3.11-2.3
container: mosaicml/pytorch:2.3.1_cpu-python3.11-ubuntu20.04
markers: daily and (remote or not remote) and not gpu and not doctest
pytest_command: coverage run -m pytest
composer_package_name: mosaicml
- name: daily-cpu-3.11-2.4
container: mosaicml/pytorch:2.4.1_cpu-python3.11-ubuntu20.04
markers: daily and (remote or not remote) and not gpu and not doctest
pytest_command: coverage run -m pytest
composer_package_name: mosaicml
- name: daily-cpu-3.11-2.5
container: mosaicml/pytorch:2.5.0_cpu-python3.11-ubuntu20.04
markers: daily and (remote or not remote) and not gpu and not doctest
pytest_command: coverage run -m pytest
composer_package_name: mosaicml
- name: daily-cpu-3.11-2.5-composer
container: mosaicml/pytorch:2.5.0_cpu-python3.11-ubuntu20.04
markers: daily and (remote or not remote) and not gpu and not doctest
pytest_command: coverage run -m pytest
composer_package_name: composer
- name: daily-cpu-doctest
container: mosaicml/pytorch:2.5.0_cpu-python3.11-ubuntu20.04
markers: daily and (remote or not remote) and not gpu and doctest
pytest_command: coverage run -m pytest tests/test_docs.py
composer_package_name: mosaicml
steps:
- name: Run PR CPU Tests
uses: mosaicml/ci-testing/.github/actions/[email protected]
with:
name: ${{ matrix.name }}
pip_deps: "[all]"
pytest_command: ${{ matrix.pytest_command }}
pytest_markers: ${{ matrix.markers }}
safe_directory: composer
composer_package_name: ${{ matrix.composer_package_name }}
container: ${{ inputs.container }}
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
code_eval_device: ${{ secrets.CODE_EVAL_DEVICE }}
code_eval_url: ${{ secrets.CODE_EVAL_URL }}
code_eval_apikey: ${{ secrets.CODE_EVAL_APIKEY }}
gcs_key: ${{ secrets.GCS_KEY }}
gcs_secret: ${{ secrets.GCS_SECRET }}
azure_account_name: ${{ secrets.AZURE_ACCOUNT_NAME }}
azure_account_access_key: ${{ secrets.AZURE_ACCOUNT_ACCESS_KEY }}
coverage:
uses: ./.github/workflows/coverage.yaml
name: Coverage Results
if: github.repository_owner == 'mosaicml'
needs: [daily-pytest-cpu]
with:
download-path: artifacts
daily-pytest-gpu:
name: ${{ matrix.name }}
runs-on: ubuntu-latest
if: github.repository_owner == 'mosaicml'
strategy:
matrix:
include:
# Unlike CPU tests, we run daily tests together with GPU tests to minimize launch time
# on MCLOUD and not eat up all GPUs at once
- name: "gpu-3.11-2.3-1-gpu"
container: mosaicml/pytorch:2.3.1_cu121-python3.11-ubuntu20.04
markers: "(daily or not daily) and (remote or not remote) and gpu and (doctest or not doctest)"
pytest_command: "coverage run -m pytest"
composer_package_name: "mosaicml"
gpu_num: 1
- name: "gpu-3.11-2.4-1-gpu"
container: mosaicml/pytorch:2.4.1_cu124-python3.11-ubuntu20.04
markers: "(daily or not daily) and (remote or not remote) and gpu and (doctest or not doctest)"
pytest_command: "coverage run -m pytest"
composer_package_name: "mosaicml"
gpu_num: 1
- name: "gpu-3.11-2.5-1-gpu"
container: mosaicml/pytorch:2.5.0_cu124-python3.11-ubuntu20.04
markers: "(daily or not daily) and (remote or not remote) and gpu and (doctest or not doctest)"
pytest_command: "coverage run -m pytest"
composer_package_name: "mosaicml"
gpu_num: 1
- name: "gpu-3.11-2.3-2-gpu"
container: mosaicml/pytorch:2.3.1_cu121-python3.11-ubuntu20.04
markers: "(daily or not daily) and (remote or not remote) and gpu and (doctest or not doctest)"
pytest_command: "coverage run -m pytest"
composer_package_name: "mosaicml"
gpu_num: 2
- name: "gpu-3.11-2.4-2-gpu"
container: mosaicml/pytorch:2.4.1_cu124-python3.11-ubuntu20.04
markers: "(daily or not daily) and (remote or not remote) and gpu and (doctest or not doctest)"
pytest_command: "coverage run -m pytest"
composer_package_name: "mosaicml"
gpu_num: 2
- name: "gpu-3.11-2.5-2-gpu"
container: mosaicml/pytorch:2.5.0_cu124-python3.11-ubuntu20.04
markers: "(daily or not daily) and (remote or not remote) and gpu and (doctest or not doctest)"
pytest_command: "coverage run -m pytest"
composer_package_name: "mosaicml"
gpu_num: 2
- name: "gpu-3.11-2.3-4-gpu"
container: mosaicml/pytorch:2.3.1_cu121-python3.11-ubuntu20.04
markers: "(daily or not daily) and (remote or not remote) and gpu and (doctest or not doctest)"
pytest_command: "coverage run -m pytest"
composer_package_name: "mosaicml"
gpu_num: 4
- name: "gpu-3.11-2.4-4-gpu"
container: mosaicml/pytorch:2.4.1_cu124-python3.11-ubuntu20.04
markers: "(daily or not daily) and (remote or not remote) and gpu and (doctest or not doctest)"
pytest_command: "coverage run -m pytest"
composer_package_name: "mosaicml"
gpu_num: 4
- name: "gpu-3.11-2.5-4-gpu"
container: mosaicml/pytorch:2.5.0_cu124-python3.11-ubuntu20.04
markers: "(daily or not daily) and (remote or not remote) and gpu and (doctest or not doctest)"
pytest_command: "coverage run -m pytest"
composer_package_name: "mosaicml"
gpu_num: 4
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Run PR GPU Tests
uses: mosaicml/ci-testing/.github/actions/[email protected]
with:
name: ${{ matrix.name }}
composer_package_name: ${{ matrix.composer_package_name }}
container: ${{ matrix.container }}
git_repo: mosaicml/composer
mcloud_timeout: 5400
pip_deps: "[all]"
pytest_command: ${{ matrix.pytest_command }}
pytest_markers: ${{ matrix.markers }}
python_version: 3.11
gpu_num: ${{ matrix.gpu_num }}
mcloud_api_key: ${{ secrets.MCLOUD_DAILY_API_KEY }}
gha_timeout: 5400
ci_repo_gpu_test_ref: v0.1.2