Reduce the device bubble introduced by heavy loop synchronization in coalesced fetch/release(z3_leaf_module) #1193
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: xpu-max1100 | |
on: | |
workflow_dispatch: | |
schedule: | |
- cron: "0 0 * * *" | |
pull_request: | |
paths: | |
- ".github/workflows/xpu-max1100.yml" | |
- "accelerator/xpu_accelerator.py" | |
- "accelerator/abstract_accelerator.py" | |
- "accelerator/cpu_accelerator.py" | |
- "accelerator/real_accelerator.py" | |
- "csrc/xpu/**" | |
- "deepspeed/runtime/engine.py" | |
- "deepspeed/runtime/bf16_optimizer.py" | |
- "deepspeed/runtime/zero/stage_1_and_2.py" | |
- "deepspeed/runtime/zero/stage3.py" | |
- "deepspeed/runtime/zero/partition_parameters.py" | |
- "deepspeed/runtime/zero/partitioned_param_coordinator.py" | |
- "deepspeed/runtime/zero/parameter_offload.py" | |
- "deepspeed/runtime/pipe/engine.py" | |
- "deepspeed/runtime/utils.py" | |
- "op_builder/xpu/**" | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }} | |
cancel-in-progress: true | |
permissions: | |
contents: read | |
issues: write | |
jobs: | |
unit-tests: | |
runs-on: [self-hosted, intel, xpu] | |
container: | |
image: intel/oneapi-basekit:2024.2.1-0-devel-ubuntu22.04 | |
ports: | |
- 80 | |
options: --privileged -it --rm --device /dev/dri:/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --ipc=host --cap-add=ALL | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Install prerequisite | |
run: | | |
apt-get update | |
apt-get install clinfo libaio-dev python3-pip -y | |
pip install torch==2.3.1 -f https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/torch/ | |
pip install intel-extension-for-pytorch==2.3.110+xpu -f https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/intel-extension-for-pytorch/ | |
pip install oneccl_bind_pt==2.3.100+xpu -f https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/oneccl-bind-pt/ | |
pip install torchvision==0.18.1 -f https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/torchvision/ | |
pip install py-cpuinfo numpy | |
pip install .[dev,autotuning] | |
- name: Check container state | |
run: | | |
ldd --version | |
ds_report | |
python3 -c "import torch; print('torch:', torch.__version__, torch)" | |
python3 -c "import torch; import intel_extension_for_pytorch; print('XPU available:', torch.xpu.is_available())" | |
python3 -c "from deepspeed.accelerator import get_accelerator; print('accelerator:', get_accelerator()._name)" | |
pip list | |
- name: Unit tests | |
run: | | |
cd tests/unit | |
pytest --verbose accelerator/* | |
pytest --verbose autotuning/* | |
pytest --verbose checkpoint/test_reshape_checkpoint.py | |
pytest --verbose checkpoint/test_moe_checkpoint.py | |
pytest --verbose checkpoint/test_shared_weights.py | |
pytest --verbose launcher/test_ds_arguments.py launcher/test_run.py | |
pytest --verbose model_parallelism/* | |
pytest --verbose moe/test_moe_tp.py | |
pytest --verbose monitor/* | |
pytest --verbose utils/* | |
pytest --verbose runtime/test_ds_config_model.py | |
pytest --verbose runtime/pipe/test_pipe_schedule.py | |
pytest --verbose runtime/zero/test_zero_config.py | |
pytest --verbose runtime/zero/test_zero_tiled.py | |
pytest --verbose runtime/zero/test_zeropp.py | |
pytest --verbose runtime/test_autocast.py | |
pytest --verbose runtime/test_data.py | |
pytest --verbose runtime/test_runtime_utils.py | |
pytest --verbose runtime/activation_checkpointing/* | |
pytest --verbose runtime/utils/* | |
pytest --verbose runtime/zero/test_zero_dynamic_class.py |