diff --git a/.github/workflows/pr-cpu.yaml b/.github/workflows/pr-cpu.yaml index 93612b7983..78faea8e44 100644 --- a/.github/workflows/pr-cpu.yaml +++ b/.github/workflows/pr-cpu.yaml @@ -23,6 +23,10 @@ jobs: container: mosaicml/pytorch:2.3.0_cpu-python3.11-ubuntu20.04 markers: "not gpu" pytest_command: "coverage run -m pytest" + - name: "cpu-2.3.1" + container: mosaicml/pytorch:2.3.1_cpu-python3.11-ubuntu20.04 + markers: "not gpu" + pytest_command: "coverage run -m pytest" name: ${{ matrix.name }} if: github.repository_owner == 'mosaicml' with: diff --git a/.github/workflows/pr-gpu.yaml b/.github/workflows/pr-gpu.yaml index 31af66e51f..335d049306 100644 --- a/.github/workflows/pr-gpu.yaml +++ b/.github/workflows/pr-gpu.yaml @@ -24,6 +24,11 @@ jobs: markers: "gpu" pytest_command: "coverage run -m pytest" pip_deps: "[all]" + - name: "gpu-2.3.1" + container: mosaicml/llm-foundry:2.3.1_cu121_flash2-latest + markers: "gpu" + pytest_command: "coverage run -m pytest" + pip_deps: "[all]" name: ${{ matrix.name }} if: github.repository_owner == 'mosaicml' with: diff --git a/README.md b/README.md index 70436271dd..c92c252395 100644 --- a/README.md +++ b/README.md @@ -230,7 +230,7 @@ python data_prep/convert_dataset_hf.py \ # Train an MPT-125m model for 10 batches composer train/train.py \ train/yamls/pretrain/mpt-125m.yaml \ - data_local=my-copy-c4 \ + variables.data_local=my-copy-c4 \ train_loader.dataset.split=train_small \ eval_loader.dataset.split=val_small \ max_duration=10ba \