Skip to content

Commit

Permalink
Merge branch 'master' into gma/fix_cpu_inference
Browse files Browse the repository at this point in the history
  • Loading branch information
delock authored Oct 8, 2023
2 parents cc0294f + 7ed952e commit af6661a
Show file tree
Hide file tree
Showing 79 changed files with 2,261 additions and 141 deletions.
1 change: 1 addition & 0 deletions .github/workflows/cpu-inference.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ on:
paths-ignore:
- 'docs/**'
- 'blogs/**'
workflow_dispatch:
merge_group:
branches: [ master ]

Expand Down
11 changes: 6 additions & 5 deletions .github/workflows/formatting.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,13 @@ jobs:
which python
python --version
- name: Install deepspeed
- name: Install dependencies
run: |
pip install .[dev,autotuning,triton]
ds_report
# Previously we would do pip install .[dev] but this is causing out of
# space errors start with torch 2.1.0 release
grep -E "clang-format|pre-commit" requirements/requirements-dev.txt | xargs pip install
- name: Formatting checks
run: |
pip show pre-commit clang-format
pre-commit run --all-files
pip show pre-commit clang-format
pre-commit run --all-files
2 changes: 1 addition & 1 deletion .github/workflows/nv-ds-chat.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:

- name: Install pytorch
run: |
pip3 install -U --cache-dir $TORCH_CACHE torch
pip3 install -U --cache-dir $TORCH_CACHE torch --index-url https://download.pytorch.org/whl/cu118
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/nv-lightning-v100.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:

- name: Install pytorch
run: |
pip install -U --cache-dir $TORCH_CACHE torch torchvision --extra-index-url https://download.pytorch.org/whl/cu116
pip install -U --cache-dir $TORCH_CACHE torch torchvision --index-url https://download.pytorch.org/whl/cu118
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/nv-mii.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:

- name: Install pytorch
run: |
pip3 install -U --cache-dir $TORCH_CACHE torch
pip3 install -U --cache-dir $TORCH_CACHE torch --index-url https://download.pytorch.org/whl/cu118
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/nv-torch-latest-v100.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:

- name: Install pytorch
run: |
pip install -U --cache-dir $TORCH_CACHE torch torchvision --extra-index-url https://download.pytorch.org/whl/cu116
pip install -U --cache-dir $TORCH_CACHE torch torchvision --index-url https://download.pytorch.org/whl/cu118
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
Expand All @@ -52,8 +52,8 @@ jobs:
run: |
unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
cd tests
coverage run --concurrency=multiprocessing -m pytest $PYTEST_OPTS --forked -n 4 unit/ --torch_ver="2.0" --cuda_ver="11.7"
coverage run --concurrency=multiprocessing -m pytest $PYTEST_OPTS --forked -m 'sequential' unit/ --torch_ver="2.0" --cuda_ver="11.7"
coverage run --concurrency=multiprocessing -m pytest $PYTEST_OPTS --forked -n 4 unit/ --torch_ver="2.1" --cuda_ver="11.8"
coverage run --concurrency=multiprocessing -m pytest $PYTEST_OPTS --forked -m 'sequential' unit/ --torch_ver="2.1" --cuda_ver="11.8"
- name: Coverage report
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/nv-torch-nightly-v100.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:

- name: Install pytorch
run: |
pip install --pre torch torchvision --extra-index-url https://download.pytorch.org/whl/nightly/cu116
pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu118
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/nv-transformers-v100.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
- name: Install pytorch
run: |
# use the same pytorch version as transformers CI
pip install -U --cache-dir $TORCH_CACHE torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116
pip install -U --cache-dir $TORCH_CACHE torch==2.0.1+cu118 --index-url https://download.pytorch.org/whl/cu118
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
Expand Down
47 changes: 47 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
name: Build and publish DeepSpeed release

on:
push:
tags:
- 'v*.*.*'

jobs:
deploy:
runs-on: ubuntu-20.04
environment: release-env

steps:
- uses: actions/checkout@v3
with:
ref: "master"
- name: Get release version from tag
run: |
echo "RELEASE_VERSION=${GITHUB_REF#refs/*/v}" >> $GITHUB_ENV
- name: Check release version
run: |
python release/check_release_version.py --release_version ${{ env.RELEASE_VERSION }}
- name: Build DeepSpeed
run: |
DS_BUILD_STRING=" " python setup.py sdist_wheel
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
password: ${{ secrets.PYPI_API_TOKEN }}
repository-url: https://upload.pypi.org/legacy/
- name: Bump version
run: |
python release/bump_patch_version.py --current_version ${{ env.RELEASE_VERSION }}
- name: Create Pull Request
uses: peter-evans/create-pull-request@v4
with:
token: ${{ secrets.GH_PAT }}
add-paths: |
version.txt
body: |
**Auto-generated PR to update version.txt after a DeepSpeed release**
Released version - ${{ env.RELEASE_VERSION }}
Author - @${{ github.actor }}
branch: AutoPR/${{ env.RELEASE_VERSION }}
assignees: ${{ github.actor }}
title: "Update version.txt after ${{ env.RELEASE_VERSION }} release"
author: ${{ github.actor }} <${{ github.actor }}@users.noreply.github.com>
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
## Latest News
<b> <span style="color:orange" > DeepSpeed empowers ChatGPT-like model training with a single click, offering 15x speedup over SOTA RLHF systems with unprecedented cost reduction at all scales; [learn how](https://github.com/microsoft/DeepSpeed/tree/master/blogs/deepspeed-chat)</span>.</b>

* [2023/10] [DeepSpeed-VisualChat: Improve Your Chat Experience with Multi-Round Multi-Image Inputs](https://github.com/microsoft/DeepSpeed/tree/master/blogs/deepspeed-visualchat/10-03-2023/README.md) [[English](https://github.com/microsoft/DeepSpeed/tree/master/blogs/deepspeed-visualchat/10-03-2023/README.md)] [[中文](https://github.com/microsoft/DeepSpeed/blob/master/blogs/deepspeed-visualchat/10-03-2023/README-Chinese.md)] [[日本語](https://github.com/microsoft/DeepSpeed/blob/master/blogs/deepspeed-visualchat/10-03-2023/README-Japanese.md)]
* [2023/09] Announcing the DeepSpeed4Science Initiative: Enabling large-scale scientific discovery through sophisticated AI system technologies [[DeepSpeed4Science website](https://deepspeed4science.ai/)] [[Tutorials](https://www.deepspeed.ai/deepspeed4science/)] [[Blog](https://www.microsoft.com/en-us/research/blog/announcing-the-deepspeed4science-initiative-enabling-large-scale-scientific-discovery-through-sophisticated-ai-system-technologies/)] [[中文](https://github.com/microsoft/DeepSpeed/blob/master/blogs/deepspeed4science/chinese/README.md)] [[日本語](https://github.com/microsoft/DeepSpeed/blob/master/blogs/deepspeed4science/japanese/README.md)]
* [2023/08] [DeepSpeed ZeRO-Inference: 20X faster inference through weight quantization and KV cache offloading](https://github.com/microsoft/DeepSpeedExamples/blob/master/inference/huggingface/zero_inference/README.md)
* [2023/08] [DeepSpeed-Chat: Llama/Llama-2 system support, efficiency boost, and training stability improvements](https://github.com/microsoft/DeepSpeed/tree/master/blogs/deepspeed-chat/ds-chat-release-8-31/README.md)
Expand Down Expand Up @@ -234,6 +235,8 @@ Conduct](https://opensource.microsoft.com/codeofconduct/). For more information
24. Pareesa Ameneh Golnari, Zhewei Yao, Yuxiong He. (2023) Selective Guidance: Are All the Denoising Steps of Guided Diffusion Important? [arXiv:2305.09847](https://arxiv.org/abs/2305.09847)
25. Zhewei Yao, Reza Yazdani Aminabadi, Olatunji Ruwase, Samyam Rajbhandari, Xiaoxia Wu, Ammar Ahmad Awan, Jeff Rasley, Minjia Zhang, Conglong Li, Connor Holmes, Zhongzhu Zhou, Michael Wyatt, Molly Smith, Lev Kurilenko, Heyang Qin, Masahiro Tanaka, Shuai Che, Shuaiwen Leon Song, Yuxiong He. (2023) DeepSpeed-Chat: Easy, Fast and Affordable RLHF Training of ChatGPT-like Models at All Scales [arXiv:2308.01320](https://arxiv.org/abs/2308.01320).
26. Xiaoxia Wu, Zhewei Yao, Yuxiong He. (2023) ZeroQuant-FP: A Leap Forward in LLMs Post-Training W4A8 Quantization Using Floating-Point Formats [arXiv:2307.09782](https://arxiv.org/abs/2307.09782)
27. Zhewei Yao, Xiaoxia Wu, Conglong Li, Minjia Zhang, Heyang Qin, Olatunji Ruwase, Ammar Ahmad Awan, Samyam Rajbhandari, Yuxiong He. (2023) DeepSpeed-VisualChat: Multi-Round Multi-Image Interleave Chat via Multi-Modal Causal Attention [arXiv:2309.14327](https://arxiv.org/pdf/2309.14327.pdf)



# Videos
Expand Down
6 changes: 5 additions & 1 deletion accelerator/abstract_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,11 @@ def LongTensor(self):
...

@abc.abstractmethod
def pin_memory(self, tensor):
def pin_memory(self, tensor, align_bytes=1):
...

@abc.abstractmethod
def is_pinned(self, tensor):
...

@abc.abstractmethod
Expand Down
5 changes: 4 additions & 1 deletion accelerator/cpu_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,9 +225,12 @@ def IntTensor(self):
def LongTensor(self):
return torch.LongTensor

def pin_memory(self, tensor):
def pin_memory(self, tensor, align_bytes=1):
return tensor

def is_pinned(self, tensor):
return tensor.is_pinned()

def op_builder_dir(self):
try:
# is op_builder from deepspeed or a 3p version? this should only succeed if it's deepspeed
Expand Down
5 changes: 4 additions & 1 deletion accelerator/cuda_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,9 +210,12 @@ def IntTensor(self):
def LongTensor(self):
return torch.cuda.LongTensor

def pin_memory(self, tensor):
def pin_memory(self, tensor, align_bytes=1):
return tensor.pin_memory()

def is_pinned(self, tensor):
return tensor.is_pinned()

def on_accelerator(self, tensor):
device_str = str(tensor.device)
if device_str.startswith('cuda:'):
Expand Down
5 changes: 4 additions & 1 deletion accelerator/mps_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,9 +192,12 @@ def IntTensor(self):
def LongTensor(self):
return

def pin_memory(self, tensor):
def pin_memory(self, tensor, align_bytes=1):
return tensor.pin_memory()

def is_pinned(self, tensor):
return tensor.is_pinned()

def on_accelerator(self, tensor):
device_str = str(tensor.device)
if device_str.startswith("mps"):
Expand Down
5 changes: 4 additions & 1 deletion accelerator/npu_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,9 +191,12 @@ def IntTensor(self):
def LongTensor(self):
return torch.npu.LongTensor

def pin_memory(self, tensor):
def pin_memory(self, tensor, align_bytes=1):
return tensor.pin_memory()

def is_pinned(self, tensor):
return tensor.is_pinned()

def on_accelerator(self, tensor):
device_str = str(tensor.device)
if device_str.startswith('npu:'):
Expand Down
Loading

0 comments on commit af6661a

Please sign in to comment.