Skip to content

Commit

Permalink
Internal
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 700479115
Change-Id: Idebaf6aa42a4124479b790270ceedfb98a23b51f
GitOrigin-RevId: 267500752a3d92aa07d731051c0e48c72127b5f9
  • Loading branch information
DeepMind Team authored and alpiccioni committed Dec 4, 2024
1 parent cd23fcb commit baa40ff
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 1 deletion.
1 change: 1 addition & 0 deletions xmanager/xm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
from xmanager.xm.packagables import dockerfile_container
from xmanager.xm.packagables import python_container
from xmanager.xm.resources import AcceleratorType
from xmanager.xm.resources import Architecture
from xmanager.xm.resources import GpuType
from xmanager.xm.resources import InvalidTpuTopologyError
from xmanager.xm.resources import JobRequirements
Expand Down
16 changes: 16 additions & 0 deletions xmanager/xm/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,13 @@ def __getitem__(cls, resource_name: str) -> 'ResourceType':
raise KeyError(f'Unknown {cls.__name__} {resource_name!r}') # pylint: disable=raise-missing-from


class Architecture(enum.Enum):
"""CPU architecture types for resources."""

HASWELL = 1
ARM = 2


class ResourceType(enum.Enum, metaclass=_CaseInsensitiveResourceTypeMeta):
"""Type of a countable resource (e.g., CPU, memory, accelerators etc).
Expand Down Expand Up @@ -363,6 +370,7 @@ class JobRequirements:
accelerator: The accelerator the jobs uses, if there is one. Jobs using
multiple accelerators are not supported because different kinds of
accelerators are usually not installed on the same host.
architecture: The architecture of the CPU the job should run on.
topology: Accelerator topology, if an accelerator is used.
location: Place where the job should run. For example a cluster name or a
Borg cell.
Expand All @@ -373,6 +381,7 @@ class JobRequirements:

task_requirements: ResourceDict
accelerator: Optional[ResourceType]
architecture: Optional[Architecture]
topology: Optional[Topology]

location: Optional[str]
Expand All @@ -384,6 +393,7 @@ def __init__(
Union[ResourceType, str], ResourceQuantity
] = immutabledict.immutabledict(),
*,
architecture: Optional[Architecture] = None,
location: Optional[str] = None,
replicas: Optional[int] = None,
service_tier: Optional[ServiceTier] = None,
Expand All @@ -395,6 +405,8 @@ def __init__(
Args:
resources: resource amounts as a dictionary, for example
{xm.ResourceType.V100: 2}.
architecture: The architecture of the CPU the job should run on. If
not specified, the default architecture for the resource will be used.
location: Place where the job should run. For example a cluster name or a
Borg cell.
replicas: Number of identical tasks to run within a job. 1 by default.
Expand All @@ -417,6 +429,8 @@ def __init__(
self.task_requirements = ResourceDict()
self.accelerator = None
self.topology = None
# TODO: Add validation for architecture for fail-fast.
self.architecture = architecture

for resource_name, value in itertools.chain(
resources.items(), kw_resources.items()
Expand Down Expand Up @@ -479,6 +493,8 @@ def __repr__(self) -> str:

if self.location:
args.append(f'location={self.location!r}')
if self.architecture:
args.append(f'architecture={self.architecture}')
if self.service_tier != ServiceTier.PROD:
args.append(f'service_tier=xm.{self.service_tier}')
if self.replicas != 1:
Expand Down
1 change: 0 additions & 1 deletion xmanager/xm/resources_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

from absl.testing import absltest
from absl.testing import parameterized

from xmanager import xm
from xmanager.xm import resources
from xmanager.xm.resources import JobRequirements
Expand Down

0 comments on commit baa40ff

Please sign in to comment.