From afa672e267ae4ed0435384ee3abc1cd30b9467c8 Mon Sep 17 00:00:00 2001 From: Kristijan Mitrovic Date: Wed, 25 Dec 2024 13:29:44 +0100 Subject: [PATCH] Provided one global DeviceConnector instance. Moved typedefs from utils to a new file. (#131) Also moved `Workload` to a new file. --- tests/conftest.py | 6 ++---- tests/infra/base_tester.py | 2 +- tests/infra/comparison.py | 2 +- tests/infra/device_connector.py | 14 +++++------- tests/infra/device_runner.py | 15 ++++++------- tests/infra/graph_tester.py | 3 ++- tests/infra/model_tester.py | 3 ++- tests/infra/op_tester.py | 4 +++- tests/infra/types.py | 23 ++++++++++++++++++++ tests/infra/utils.py | 38 ++++----------------------------- tests/infra/workload.py | 26 ++++++++++++++++++++++ 11 files changed, 75 insertions(+), 61 deletions(-) create mode 100644 tests/infra/types.py create mode 100644 tests/infra/workload.py diff --git a/tests/conftest.py b/tests/conftest.py index 06c5e39..f8f878d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,7 +7,7 @@ import jax import jax._src.xla_bridge as xb import pytest -from infra.device_connector import DeviceConnector +from infra.device_connector import device_connector def initialize(): @@ -25,7 +25,5 @@ def initialize(): def setup_session(): # Added to prevent `PJRT_Api already exists for device type tt` error. # Will be removed completely soon. - connector = DeviceConnector.get_instance() - - if not connector.is_initialized(): + if not device_connector.is_initialized(): initialize() diff --git a/tests/infra/base_tester.py b/tests/infra/base_tester.py index cd4f39e..97024c7 100644 --- a/tests/infra/base_tester.py +++ b/tests/infra/base_tester.py @@ -17,7 +17,7 @@ compare_pcc, ) from .device_runner import DeviceRunner -from .utils import Tensor +from .types import Tensor class BaseTester(ABC): diff --git a/tests/infra/comparison.py b/tests/infra/comparison.py index cb3c88f..ef85a00 100644 --- a/tests/infra/comparison.py +++ b/tests/infra/comparison.py @@ -10,7 +10,7 @@ import jax.numpy as jnp from .device_runner import run_on_cpu -from .utils import Tensor +from .types import Tensor @dataclass diff --git a/tests/infra/device_connector.py b/tests/infra/device_connector.py index 8dd581b..3575094 100644 --- a/tests/infra/device_connector.py +++ b/tests/infra/device_connector.py @@ -62,15 +62,6 @@ def __init__(self) -> None: self._plugin_path = plugin_path self._initialize_backend() - @staticmethod - def get_instance() -> DeviceConnector: - """ - Factory method returning singleton connector instance. - - Use this method instead of constructor. - """ - return DeviceConnector() - def is_initialized(self) -> bool: """Checks if connector is already initialized.""" if hasattr(self, "_initialized") and self._initialized == True: @@ -119,3 +110,8 @@ def _initialize_backend(self) -> None: jax.config.update("jax_platforms", self._supported_devices_str()) self._initialized = True + + +# `DeviceConnector._initialize_backend` must be executed before anything jax related is +# called. By providing this global instance, that is secured. +device_connector = DeviceConnector() diff --git a/tests/infra/device_runner.py b/tests/infra/device_runner.py index 031bdd3..fc1c917 100644 --- a/tests/infra/device_runner.py +++ b/tests/infra/device_runner.py @@ -6,8 +6,9 @@ import jax -from .device_connector import DeviceConnector, DeviceType -from .utils import Tensor, Workload +from .device_connector import DeviceType, device_connector +from .types import Tensor +from .workload import Workload class DeviceRunner: @@ -64,9 +65,7 @@ def put_tensors_on_gpu(*tensors: Tensor) -> Sequence[Tensor]: def _run_on_device(device_type: DeviceType, workload: Workload) -> Tensor: """Runs `workload` on device identified by `device_type`.""" device_workload = DeviceRunner._put_on_device(device_type, workload) - - connector = DeviceConnector().get_instance() - device = connector.connect_device(device_type) + device = device_connector.connect_device(device_type) with jax.default_device(device): return device_workload.execute() @@ -74,8 +73,7 @@ def _run_on_device(device_type: DeviceType, workload: Workload) -> Tensor: @staticmethod def _put_on_device(device_type: DeviceType, workload: Workload) -> Workload: """Puts `workload` on device and returns it.""" - connector = DeviceConnector().get_instance() - device = connector.connect_device(device_type) + device = device_connector.connect_device(device_type) return DeviceRunner._safely_put_workload_on_device(workload, device) @staticmethod @@ -83,8 +81,7 @@ def _put_tensors_on_device( device_type: DeviceType, tensors: Sequence[Tensor] ) -> Sequence[Tensor]: """Puts `tensors` on device identified by `device_type`.""" - connector = DeviceConnector().get_instance() - device = connector.connect_device(device_type) + device = device_connector.connect_device(device_type) return [jax.device_put(t, device) for t in tensors] @staticmethod diff --git a/tests/infra/graph_tester.py b/tests/infra/graph_tester.py index 4f63700..eed74b4 100644 --- a/tests/infra/graph_tester.py +++ b/tests/infra/graph_tester.py @@ -8,7 +8,8 @@ from .comparison import ComparisonConfig from .op_tester import OpTester -from .utils import Tensor, Workload +from .types import Tensor +from .workload import Workload class GraphTester(OpTester): diff --git a/tests/infra/model_tester.py b/tests/infra/model_tester.py index 7faf601..61933a1 100644 --- a/tests/infra/model_tester.py +++ b/tests/infra/model_tester.py @@ -14,7 +14,8 @@ from .base_tester import BaseTester from .comparison import ComparisonConfig from .device_runner import DeviceRunner -from .utils import Model, Workload +from .types import Model +from .workload import Workload class RunMode(Enum): diff --git a/tests/infra/op_tester.py b/tests/infra/op_tester.py index 7122d17..b86cf49 100644 --- a/tests/infra/op_tester.py +++ b/tests/infra/op_tester.py @@ -9,7 +9,9 @@ from .base_tester import BaseTester from .comparison import ComparisonConfig from .device_runner import DeviceRunner -from .utils import Tensor, Workload, random_tensor +from .types import Tensor +from .utils import random_tensor +from .workload import Workload class OpTester(BaseTester): diff --git a/tests/infra/types.py b/tests/infra/types.py new file mode 100644 index 0000000..5d35bdf --- /dev/null +++ b/tests/infra/types.py @@ -0,0 +1,23 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 + +from enum import Enum +from typing import Union + +import jax +from flax import linen, nnx + +# Convenience alias. Could be used to represent jax.Array, torch.Tensor, np.ndarray, etc. +Tensor = Union[jax.Array] + +# Convenience alias. Could be used to represent nnx.Module, torch.nn.Module, etc. +# NOTE nnx.Module is the newest API, linen.Module is legacy but it is used in some +# huggingface models. +Model = Union[nnx.Module, linen.Module] + + +class Framework(Enum): + JAX = "jax" + TORCH = "torch" + NUMPY = "numpy" diff --git a/tests/infra/utils.py b/tests/infra/utils.py index 0cfb44e..8fd4b99 100644 --- a/tests/infra/utils.py +++ b/tests/infra/utils.py @@ -2,44 +2,13 @@ # # SPDX-License-Identifier: Apache-2.0 -from dataclasses import dataclass -from enum import Enum -from typing import Any, Callable, Mapping, Optional, Sequence, Union - import jax import jax.numpy as jnp -from flax import linen, nnx from jax import export - -@dataclass -class Workload: - executable: Callable - args: Sequence[Any] - kwargs: Optional[Mapping[str, Any]] = None - - def __post_init__(self): - # If kwargs is None, initialize it to an empty dictionary. - if self.kwargs is None: - self.kwargs = {} - - def execute(self) -> Any: - return self.executable(*self.args, **self.kwargs) - - -class Framework(Enum): - JAX = "jax" - TORCH = "torch" - NUMPY = "numpy" - - -# Convenience alias. Could be used to represent jax.Array, torch.Tensor, np.ndarray, etc. -Tensor = Union[jax.Array] - -# Convenience alias. Could be used to represent nnx.Module, torch.nn.Module, etc. -# NOTE nnx.Module is the newest API, linen.Module is legacy but it is used in some -# huggingface models. -Model = Union[nnx.Module, linen.Module] +from .device_runner import run_on_cpu +from .types import Framework, Tensor +from .workload import Workload def _str_to_dtype(dtype_str: str, framework: Framework = Framework.JAX): @@ -50,6 +19,7 @@ def _str_to_dtype(dtype_str: str, framework: Framework = Framework.JAX): raise ValueError(f"Unsupported framework: {framework.value}.") +@run_on_cpu def random_tensor( shape: tuple, dtype: str = "float32", diff --git a/tests/infra/workload.py b/tests/infra/workload.py new file mode 100644 index 0000000..7464f94 --- /dev/null +++ b/tests/infra/workload.py @@ -0,0 +1,26 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 + +from dataclasses import dataclass +from typing import Any, Callable, Mapping, Optional, Sequence + + +@dataclass +class Workload: + """ + Convenience dataclass storing a callable and its positional and keyword arguments. + """ + + executable: Callable + args: Sequence[Any] + kwargs: Optional[Mapping[str, Any]] = None + + def __post_init__(self): + # If kwargs is None, initialize it to an empty dictionary. + if self.kwargs is None: + self.kwargs = {} + + def execute(self) -> Any: + """Calls callable passing stored args and kwargs directly.""" + return self.executable(*self.args, **self.kwargs)