diff --git a/tests/unit/ops/transformer/inference/test_attention.py b/tests/unit/ops/transformer/inference/test_attention.py index 0521245bcdf3..ecf681542ff6 100644 --- a/tests/unit/ops/transformer/inference/test_attention.py +++ b/tests/unit/ops/transformer/inference/test_attention.py @@ -27,8 +27,8 @@ def ref_torch_attention(q, k, v, mask, sm_scale): @pytest.mark.parametrize("causal", [True, False]) @pytest.mark.parametrize("use_flash", [True, False]) def test_attention(BATCH, H, N_CTX, D_HEAD, causal, use_flash, dtype=torch.float16): - if not deepspeed.HAS_TRITON: - pytest.skip("triton has to be installed for the test") + if not deepspeed.get_accelerator().is_triton_supported(): + pytest.skip("triton is not supported on this system") minus_inf = -65504.0 dev = deepspeed.accelerator.get_accelerator().device_name() diff --git a/tests/unit/ops/transformer/inference/test_gelu.py b/tests/unit/ops/transformer/inference/test_gelu.py index beb74d09ab30..5f820ef3b579 100644 --- a/tests/unit/ops/transformer/inference/test_gelu.py +++ b/tests/unit/ops/transformer/inference/test_gelu.py @@ -61,8 +61,8 @@ def test_gelu(batch, sequence, channels, dtype, use_triton_ops): activations_ds = torch.randn((batch, sequence, channels), dtype=dtype, device=device) activations_ref = activations_ds.clone().detach() - if not deepspeed.HAS_TRITON and use_triton_ops: - pytest.skip("triton has to be installed for the test") + if not deepspeed.get_accelerator().is_triton_supported(): + pytest.skip("triton is not supported on this system") ds_out = run_gelu_ds(activations_ds, use_triton_ops) ref_out = run_gelu_reference(activations_ref) assert (allclose(ds_out, ref_out)) diff --git a/tests/unit/ops/transformer/inference/test_layer_norm.py b/tests/unit/ops/transformer/inference/test_layer_norm.py index 2912807e9f43..7711daf0d887 100644 --- a/tests/unit/ops/transformer/inference/test_layer_norm.py +++ b/tests/unit/ops/transformer/inference/test_layer_norm.py @@ -45,8 +45,8 @@ def ds_triton_implementation(vals, gamma, beta, epsilon): @pytest.mark.parametrize("dtype", get_dtypes()) @pytest.mark.parametrize("use_triton_ops", [False, True]) def test_layer_norm(batch, seq_len, channels, dtype, use_triton_ops): - if not deepspeed.HAS_TRITON and use_triton_ops: - pytest.skip("triton has to be installed for the test") + if not deepspeed.get_accelerator().is_triton_supported(): + pytest.skip("triton is not supported on this system") vals = torch.randn((batch, seq_len, channels), dtype=dtype, device=get_accelerator().current_device_name()) gamma = torch.randn((channels), dtype=dtype, device=get_accelerator().current_device_name()) @@ -93,8 +93,8 @@ def residual_ds_triton_implementation(vals, bias, res, gamma, beta, epsilon): @pytest.mark.parametrize("dtype", get_dtypes()) @pytest.mark.parametrize("use_triton_ops", [False, True]) def test_layer_norm_residual(batch, seq_len, channels, dtype, use_triton_ops): - if not deepspeed.HAS_TRITON and use_triton_ops: - pytest.skip("triton has to be installed for the test") + if not deepspeed.get_accelerator().is_triton_supported(): + pytest.skip("triton is not supported on this system") vals = torch.randn((batch, seq_len, channels), dtype=dtype, device=get_accelerator().current_device_name()) residual = torch.randn((batch, seq_len, channels), dtype=dtype, device=get_accelerator().current_device_name()) @@ -163,8 +163,8 @@ def test_layer_norm_residual_store_pre_ln_res(batch, seq_len, channels, dtype): @pytest.mark.parametrize("residual", [True, False]) @pytest.mark.parametrize("input_bias", [True, False]) def test_triton_layer_norm(M, N, dtype, residual, input_bias, eps=1e-5, device='cuda'): - if not deepspeed.HAS_TRITON: - pytest.skip("triton has to be installed for the test") + if not deepspeed.get_accelerator().is_triton_supported(): + pytest.skip("triton is not supported on this system") dev = get_accelerator().device_name() torch.manual_seed(0) # create data diff --git a/tests/unit/ops/transformer/inference/test_matmul.py b/tests/unit/ops/transformer/inference/test_matmul.py index 804a85750a3a..559aa2c60afe 100644 --- a/tests/unit/ops/transformer/inference/test_matmul.py +++ b/tests/unit/ops/transformer/inference/test_matmul.py @@ -42,8 +42,8 @@ def run_matmul_ds(a, b, use_triton_ops=False): @pytest.mark.parametrize("dtype", [torch.float16]) @pytest.mark.parametrize("use_triton_ops", [True]) def test_matmul_4d(B, H, M, K, N, dtype, use_triton_ops): - if not deepspeed.HAS_TRITON and use_triton_ops: - pytest.skip("triton has to be installed for the test") + if not deepspeed.get_accelerator().is_triton_supported(): + pytest.skip("triton is not supported on this system") # skip autotune in testing from deepspeed.ops.transformer.inference.triton.matmul_ext import fp16_matmul diff --git a/tests/unit/ops/transformer/inference/test_residual_add.py b/tests/unit/ops/transformer/inference/test_residual_add.py index 807da4904341..cab64d1d0555 100644 --- a/tests/unit/ops/transformer/inference/test_residual_add.py +++ b/tests/unit/ops/transformer/inference/test_residual_add.py @@ -74,8 +74,8 @@ def run_residual_add_reference(hidden_state, residual, attn_output, attn_bias, f @pytest.mark.parametrize("use_triton_ops", [True, False]) def test_residual_add(batch, sequence, hidden_dim, dtype, mlp_after_attn, add_bias, mp_size, pre_attn_norm, use_triton_ops): - if not deepspeed.HAS_TRITON and use_triton_ops: - pytest.skip("triton has to be installed for the test") + if not deepspeed.get_accelerator().is_triton_supported(): + pytest.skip("triton is not supported on this system") ds_out = torch.randn((batch, sequence, hidden_dim), dtype=dtype, device=get_accelerator().device_name()) residual = torch.randn((batch, sequence, hidden_dim), dtype=dtype, device=get_accelerator().device_name()) attn_output = torch.randn((batch, sequence, hidden_dim), dtype=dtype, device=get_accelerator().device_name()) diff --git a/tests/unit/ops/transformer/inference/test_softmax.py b/tests/unit/ops/transformer/inference/test_softmax.py index 4f6d69160aa7..e582be1b926a 100644 --- a/tests/unit/ops/transformer/inference/test_softmax.py +++ b/tests/unit/ops/transformer/inference/test_softmax.py @@ -40,8 +40,8 @@ def run_softmax_ds(input, use_triton_ops=False): @pytest.mark.parametrize("dtype", [torch.float16, torch.float32]) @pytest.mark.parametrize("use_triton_ops", [True]) def test_softmax(batch, sequence, channels, dtype, use_triton_ops): - if not deepspeed.HAS_TRITON and use_triton_ops: - pytest.skip("triton has to be installed for the test") + if not deepspeed.get_accelerator().is_triton_supported(): + pytest.skip("triton is not supported on this system") device = deepspeed.accelerator.get_accelerator().device_name() input_ds = torch.randn((batch, sequence, channels), dtype=dtype, device=device)