Skip to content

Commit

Permalink
Merge branch 'master' into hongwei/fix_domino_allreduce
Browse files Browse the repository at this point in the history
  • Loading branch information
tjruwase authored Dec 18, 2024
2 parents 2833db7 + 4cd1d97 commit cc31ef5
Show file tree
Hide file tree
Showing 9 changed files with 15 additions and 19 deletions.
1 change: 1 addition & 0 deletions .github/workflows/no-torch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ on:
workflow_dispatch:
pull_request:
paths:
- 'accelerator/**'
- '.github/workflows/no-torch.yml'
- 'op_builder/**'
schedule:
Expand Down
8 changes: 7 additions & 1 deletion accelerator/cpu_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,15 @@

# DeepSpeed Team

import torch
from .abstract_accelerator import DeepSpeedAccelerator

# During setup stage torch may not be installed, pass on no torch will
# allow op builder related API to be executed.
try:
import torch
except ImportError as e:
pass

try:
import oneccl_bindings_for_pytorch # noqa: F401 # type: ignore
oneccl_imported_p = True
Expand Down
2 changes: 2 additions & 0 deletions deepspeed/launcher/multinode_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ def get_cmd(self, environment, active_resources):
deepspeed_launch.append("--no_local_rank")
if self.args.save_pid:
deepspeed_launch += ["--save_pid", f"{os.getpid()}"]
if self.args.enable_each_rank_log:
deepspeed_launch.append(f"--enable_each_rank_log={self.args.enable_each_rank_log}")
if self.args.elastic_training:
deepspeed_launch.append("--enable_elastic_training")
deepspeed_launch.append(f"--max_elastic_nodes={self.args.max_elastic_nodes}")
Expand Down
2 changes: 0 additions & 2 deletions tests/unit/ops/transformer/inference/test_bias_geglu.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]:
pytest.skip("Inference ops are not available on this system", allow_module_level=True)

torch_minor_version = None


def run_bias_geglu_reference(activations, bias):
# Expected behavior is that of casting to float32 internally
Expand Down
2 changes: 0 additions & 2 deletions tests/unit/ops/transformer/inference/test_bias_gelu.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@
if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]:
pytest.skip("Inference ops are not available on this system", allow_module_level=True)

torch_minor_version = None


def run_bias_gelu_reference(activations, bias):
# Expected behavior is that of casting to float32 internally and using the tanh approximation
Expand Down
2 changes: 0 additions & 2 deletions tests/unit/ops/transformer/inference/test_bias_relu.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]:
pytest.skip("Inference ops are not available on this system", allow_module_level=True)

torch_minor_version = None


def run_bias_relu_reference(activations, bias):
# Expected behavior is that of casting to float32 internally
Expand Down
14 changes: 5 additions & 9 deletions tests/unit/ops/transformer/inference/test_gelu.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,11 @@
from deepspeed.ops.op_builder import InferenceBuilder
from deepspeed.ops.transformer import DeepSpeedInferenceConfig
from deepspeed.ops.transformer.inference.op_binding.bias_gelu import BiasGeluOp
from deepspeed.utils.torch import required_torch_version

if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]:
pytest.skip("Inference ops are not available on this system", allow_module_level=True)

torch_minor_version = None


def allclose(x, y):
assert x.dtype == y.dtype
Expand All @@ -23,14 +22,11 @@ def allclose(x, y):


def version_appropriate_gelu(activations):
global torch_minor_version
if torch_minor_version is None:
torch_minor_version = int(torch.__version__.split('.')[1])
# If torch version = 1.12
if torch_minor_version < 12:
return torch.nn.functional.gelu(activations)
else:
# gelu behavior changes (correctly) in torch 1.12
if required_torch_version(min_version=1.12):
return torch.nn.functional.gelu(activations, approximate='tanh')
else:
return torch.nn.functional.gelu(activations)


def run_gelu_reference(activations):
Expand Down
1 change: 0 additions & 1 deletion tests/unit/ops/transformer/inference/test_matmul.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
pytest.skip("Inference ops are not available on this system", allow_module_level=True)

inference_module = None
torch_minor_version = None


def allclose(x, y):
Expand Down
2 changes: 0 additions & 2 deletions tests/unit/ops/transformer/inference/test_softmax.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]:
pytest.skip("Inference ops are not available on this system", allow_module_level=True)

torch_minor_version = None


def allclose(x, y):
assert x.dtype == y.dtype
Expand Down

0 comments on commit cc31ef5

Please sign in to comment.