Skip to content

Commit

Permalink
Merge branch 'master' into ds_overlap_fix
Browse files Browse the repository at this point in the history
  • Loading branch information
loadams authored Dec 18, 2024
2 parents 47f573b + f9e158a commit 61ea87f
Show file tree
Hide file tree
Showing 13 changed files with 46 additions and 42 deletions.
1 change: 1 addition & 0 deletions .github/workflows/no-torch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ on:
workflow_dispatch:
pull_request:
paths:
- 'accelerator/**'
- '.github/workflows/no-torch.yml'
- 'op_builder/**'
schedule:
Expand Down
45 changes: 24 additions & 21 deletions CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
Expand Up @@ -8,49 +8,52 @@

# top-level repo folders
/.github/ @loadams
/azure/ @awan-10
/benchmarks/ @awan-10 @tjruwase
/azure/ @loadams
/benchmarks/ @guanhuawang @tjruwase
/bin/ @loadams
/csrc/ @awan-10
/csrc/ @tjruwase
/deepspeed/ @loadams @tjruwase
/docker/ @awan-10
/docker/ @loadams @guanhuawang
/docs/ @loadams @tjruwase
/examples/ @awan-10 @tohtana
/examples/ @jomayeri @tohtana
/op_builder/ @loadams @tjruwase @jomayeri
/release/ @loadams
/release/ @loadams @jomayeri
/requirements/ @loadams
/scripts/ @awan-10
/scripts/ @loadams @tjruwase
/tests/ @tjruwase @loadams @tohtana

# deepspeed
/deepspeed/autotuning/ @loadams
/deepspeed/checkpoint/ @tjruwase
/deepspeed/comm/ @awan-10
/deepspeed/comm/ @guanhuawang
/deepspeed/compression/ @tjruwase
/deepspeed/elasticity/ @awan-10
/deepspeed/elasticity/ @tjruwase
/deepspeed/launcher/ @loadams
/deepspeed/module_inject/ @awan-10
/deepspeed/module_inject/ @hwchen2017 @loadams
/deepspeed/moe/ @tohtana
/deepspeed/monitor/ @awan-10
/deepspeed/monitor/ @tjruwase
/deepspeed/nebula/ @tjruwase
/deepspeed/nvme/ @tjruwase @jomayeri
/deepspeed/ops/ @tohtana
/deepspeed/pipe/ @tohtana @loadams
/deepspeed/profiling/ @loadams
/deepspeed/utils/ @tjruwase @awan-10
/deepspeed/sequence/ @tohtana
/deepspeed/utils/ @tjruwase @tohtana

# inference
/deepspeed/inference/ @awan-10
/deepspeed/model_implementations/ @awan-10
/deepspeed/inference/ @hwchen2017 @tohtana
/deepspeed/model_implementations/@tohtana @loadams

# training
/deepspeed/runtime/ @tjruwase @tohtana
/deepspeed/runtime/activation_checkpointing/ @tjruwase
/deepspeed/runtime/checkpoint_engine/ @tjruwase
/deepspeed/runtime/comm/ @awan-10
/deepspeed/runtime/compression/ @awan-10
/deepspeed/runtime/comm/ @guanhuawang
/deepspeed/runtime/compression/ @tjruwase
/deepspeed/runtime/data_pipeline/ @tjruwase
/deepspeed/runtime/fp16/ @tjruwase
/deepspeed/runtime/fp16/onebit/ @awan-10
/deepspeed/runtime/pipe/ @loadams
/deepspeed/runtime/swap_tensor/ @tjruwase
/deepspeed/runtime/zero/ @tjruwase
/deepspeed/runtime/domino/ @guanhuawang @hwchen2017
/deepspeed/runtime/fp16/ @tjruwase @tohtana
/deepspeed/runtime/fp16/onebit/ @tjruwase
/deepspeed/runtime/pipe/ @loadams @tohtana
/deepspeed/runtime/swap_tensor/ @tjruwase @jomayeri
/deepspeed/runtime/zero/ @tjruwase @tohtana
8 changes: 7 additions & 1 deletion accelerator/cpu_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,15 @@

# DeepSpeed Team

import torch
from .abstract_accelerator import DeepSpeedAccelerator

# During setup stage torch may not be installed, pass on no torch will
# allow op builder related API to be executed.
try:
import torch
except ImportError as e:
pass

try:
import oneccl_bindings_for_pytorch # noqa: F401 # type: ignore
oneccl_imported_p = True
Expand Down
2 changes: 2 additions & 0 deletions deepspeed/launcher/multinode_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ def get_cmd(self, environment, active_resources):
deepspeed_launch.append("--no_local_rank")
if self.args.save_pid:
deepspeed_launch += ["--save_pid", f"{os.getpid()}"]
if self.args.enable_each_rank_log:
deepspeed_launch.append(f"--enable_each_rank_log={self.args.enable_each_rank_log}")
if self.args.elastic_training:
deepspeed_launch.append("--enable_elastic_training")
deepspeed_launch.append(f"--max_elastic_nodes={self.args.max_elastic_nodes}")
Expand Down
6 changes: 5 additions & 1 deletion deepspeed/module_inject/auto_tp.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,11 +346,15 @@ def _replace(self, child, name, conv_linear_layer):
weight, bias = shard_value_with_share_qk(child.weight.data, child.bias, dist.get_rank(),
dist.get_world_size(), False)
return LinearAllreduce(weight, bias, self.mp_group)
# For Arctic model, bypass to all_reduce replacement for w2 weights
arctic_w2_all_reduce_linear = False
if 'Arctic' in str(self.module) and 'w2' in name:
arctic_w2_all_reduce_linear = True
# For MLP including chunk layer.
if 'gate_up_proj' in name or ('dense_h_to_4h' in name and 'GLM' in str(self.module)):
weight, bias = shard_chunk_mlp(child.weight.data, child.bias, dist.get_rank(), dist.get_world_size())
return LinearLayer(weight=weight, bias=bias)
if name in self.all_reduce_linears:
if name in self.all_reduce_linears or arctic_w2_all_reduce_linear:
# if conv_linear_layer [weight_shape[1], weight_shape[0] // mp_size]
# else [weight_shape[0], weight_shape[1] // mp_size]

Expand Down
1 change: 1 addition & 0 deletions docs/_tutorials/automatic-tensor-parallelism.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ The following results were collected using V100 SXM2 32GB GPUs.
The following model families have been successfully tested with automatic tensor parallelism. Other models may work but have not been tested yet.

- albert
- arctic
- baichuan
- bert
- bigbird_pegasus
Expand Down
2 changes: 0 additions & 2 deletions tests/unit/ops/transformer/inference/test_bias_geglu.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]:
pytest.skip("Inference ops are not available on this system", allow_module_level=True)

torch_minor_version = None


def run_bias_geglu_reference(activations, bias):
# Expected behavior is that of casting to float32 internally
Expand Down
2 changes: 0 additions & 2 deletions tests/unit/ops/transformer/inference/test_bias_gelu.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@
if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]:
pytest.skip("Inference ops are not available on this system", allow_module_level=True)

torch_minor_version = None


def run_bias_gelu_reference(activations, bias):
# Expected behavior is that of casting to float32 internally and using the tanh approximation
Expand Down
2 changes: 0 additions & 2 deletions tests/unit/ops/transformer/inference/test_bias_relu.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]:
pytest.skip("Inference ops are not available on this system", allow_module_level=True)

torch_minor_version = None


def run_bias_relu_reference(activations, bias):
# Expected behavior is that of casting to float32 internally
Expand Down
14 changes: 5 additions & 9 deletions tests/unit/ops/transformer/inference/test_gelu.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,11 @@
from deepspeed.ops.op_builder import InferenceBuilder
from deepspeed.ops.transformer import DeepSpeedInferenceConfig
from deepspeed.ops.transformer.inference.op_binding.bias_gelu import BiasGeluOp
from deepspeed.utils.torch import required_torch_version

if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]:
pytest.skip("Inference ops are not available on this system", allow_module_level=True)

torch_minor_version = None


def allclose(x, y):
assert x.dtype == y.dtype
Expand All @@ -23,14 +22,11 @@ def allclose(x, y):


def version_appropriate_gelu(activations):
global torch_minor_version
if torch_minor_version is None:
torch_minor_version = int(torch.__version__.split('.')[1])
# If torch version = 1.12
if torch_minor_version < 12:
return torch.nn.functional.gelu(activations)
else:
# gelu behavior changes (correctly) in torch 1.12
if required_torch_version(min_version=1.12):
return torch.nn.functional.gelu(activations, approximate='tanh')
else:
return torch.nn.functional.gelu(activations)


def run_gelu_reference(activations):
Expand Down
1 change: 0 additions & 1 deletion tests/unit/ops/transformer/inference/test_matmul.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
pytest.skip("Inference ops are not available on this system", allow_module_level=True)

inference_module = None
torch_minor_version = None


def allclose(x, y):
Expand Down
2 changes: 0 additions & 2 deletions tests/unit/ops/transformer/inference/test_softmax.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]:
pytest.skip("Inference ops are not available on this system", allow_module_level=True)

torch_minor_version = None


def allclose(x, y):
assert x.dtype == y.dtype
Expand Down
2 changes: 1 addition & 1 deletion version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.16.2
0.16.3

0 comments on commit 61ea87f

Please sign in to comment.