From 47b974b6496a90687357f59104e7027fb6271160 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Juli=C3=A1n=20Espina?= Date: Mon, 9 Sep 2024 17:30:43 -0600 Subject: [PATCH] Begin implementation of ROCm driver installation --- charms/slurmd/charmcraft.yaml | 14 +++++++++ charms/slurmd/src/constants.py | 55 +++++++++++++++++++++++++++++++++ charms/slurmd/src/slurmd_ops.py | 43 +++++++++++++++++++++++++- 3 files changed, 111 insertions(+), 1 deletion(-) diff --git a/charms/slurmd/charmcraft.yaml b/charms/slurmd/charmcraft.yaml index a3f5a00..8de70b8 100644 --- a/charms/slurmd/charmcraft.yaml +++ b/charms/slurmd/charmcraft.yaml @@ -78,6 +78,20 @@ config: $ juju config slurmd nhc-conf="$(cat extra-nhc.conf)" ``` + gpu: + default: "" + type: string + description: > + Type of GPU driver to install. + + Available options: [`amd`]. + + Example usage: + ```bash + $ juju config slurmd gpu=amd + ``` + + actions: node-configured: description: Remove a node from DownNodes when the reason is `New node`. diff --git a/charms/slurmd/src/constants.py b/charms/slurmd/src/constants.py index 21b428f..f28b3eb 100644 --- a/charms/slurmd/src/constants.py +++ b/charms/slurmd/src/constants.py @@ -40,3 +40,58 @@ =cs1s -----END PGP PUBLIC KEY BLOCK----- """ + +ROCM_PPA_KEY = """ +----BEGIN PGP PUBLIC KEY BLOCK----- +Version: GnuPG v1 + +mQINBFefsSABEADmVqQyRi5bcUs/eG8mnKLdY+V+xuKuHLuujlXinSaMFRO640Md +C2HNYLSd58Z8cB1rKfiN639CZp+SkDWq60cFXDCcX9djT0JmBzsTD/gwoMr16tMY +O+Z2mje2pEYgDJdmYrephhXn29BfebW1IQKdA+4C7l675mJ/T8yVMUNXC0hqfGDA +h1MJUQy/lz1S2fGdjCKX0PiYOnCOyhNa7aTpw9PkZWgEa/s4BhplFZxvLohrCcf6 +ks0gUITHfeEhJvj2KurRfL68DgFifGnG+/fsMHgW1Xp19GsnIVaoh6cV7/iFHhrb +6YHI1fdOq/mwOfG8mJnXmDXC/o24Q7mRRwvoJcsT0j+thRirs8trV01mKY+7Hxd2 +CamWttibo062pjWN2aEUMPmEU2kmGOupsZtlpqn6SGCd2+6maOPMNEq/F0EWxhul +q6mgezVb8pvJ3bwvph2/lMSgfT9fHs6UIh4i/3rnA5/JaejFonlnS9xEuglKjklj +UoikSPBOwjvoPW2u99WCflURFSXVvuk7Ci+XkbVPIZyD6gFJjeY02Ic5MAv5tj/z +0fpgr/CfwEllms+z7qz768xRweA0kmPTTARdufVTna6EV3K3njxvCIIfnrp1cF6S +e3VrREd98gO0Rmzy74UFqkXl9Tb/+UILx1qVRmOBinwacKGqzo+k9jPUKQARAQAB +tChBTUQgTUxTRSBEZXZPcHMgPGRsLk1MU0UuRGV2T3BzQGFtZC5jb20+iQI+BBMB +AgAoAhsDBgsJCAcDAgYVCAIJCgsEFgIDAQIeAQIXgAUCYfuRkwUJE8Hh5wAKCRCT +hrSKGmk8XI1AEACSJLVGHCLJOOKz9fbUR4KWl7Gpv0RWccwxhH01jNZTSXUCEnKA +2KYmaqFvrT5szxWILobmCNYtAlbdkpUfb0mMaF3UtTu+1UMOw2ExzxHw1FyA+z6d +vLqDKXLldsOFUfojDUhD5cK6uvONPc1orCf/4ve6wnRG838bAzb4VrFR64IxfPjx +NukH+jo2nEXNpnNv44DEiq65CcObaPuwAVBFnRYD/ByPO4ZArxFXqNzHRxpoZkKv +iwzhbPG4cirioqzRR9y2SsC+a2sO4a/jH0wOL2+n4L86xShYcuCBxXvS/AwrV/aO +JxKOfAUV4VQegAOQz64L+iz7PslNSTILJGdvGcC5Ckgpo6evdWBT7KdGXhzf4S1f +wZjYyP9sfQa7LxqyrkLHZqYt4If4Jmukx7cApBYp1nPnuCQrLU6D4Arq0ZVWQuNV +hbABLeqwdVQcX+vG/Kr/ZC+Vkv3Z8oElwVGAAQ6HNXr/u8ud2bu6iNJ5mcQbM1HD +KTNt5LUrk0p588a8dk0/TyC5xeKSv51iNL+aOVaTr0pRwgaHtEVar2i0FPC1mkr4 +1hhIDddx8WLoUt/52f1juyr/4CpL1M5f1cbMVjV6i0kqIEx/hxrryc+fZZQT5R4M +vysxcsh8ttgpABG5vzz2rLOCanmQ4eDdmlugzn/u0ngoDdnC0gEfnVVutLkCDQRX +n7EgARAAlsWVKSOQicuBxBlo3U5tre5whSyAOWHuy6/heGwCkGssTahbIL8pRwOL +5nKJCPCKKJ4YYoZ+Jzer9WTsDRZU/zpQXK9C5WdfF6DN/Fai3lqhgeDDVyF0hUDr +NQigm/w66JEYTGtMcC5PnYv7S6Zrn9WN4anv9n5thNwfsqxpbbg6sAQ2aLHLsW96 +myQE9v1s0YoSZYc7rFYBwszE+tFX0kLlyBYSRVns/USQifu66RObO706d8DHp6Ro +vO6WgsTu+0RR2FEUabBx1q6iKe1cqK0FYtWd8tXCpqQBm0zGC6UwTp4Z4GMCX2Pk +3xAMmrItW5kPKCANB+P/8ZoOoZLIX5Fr9axQ496lUh0ZDhOACewJfj9Szk9GN5rq ++2QKnRepatevGBVaN0lCAEwg2q9/9xmrT6CixFrbnw2T6mWHM3jQrvduqmC0c1Cd +uMZBGDKSpjouaN0UKtC+udwWiY7w452pcjCnUjzjk7tR1IarSCnLLYeb+MDCK83M +CFH60SmBfdqjRiTiLas34KSKNnmbfUfrTYswf0Oed/qXAUSlYOCmWl4sV8n+Ebpy +XfY80/fzu95RbpMEZMhUTRtvr64O5jaWM/lFnubnegGTW3Bk/fBR2VRsBx56ZHlc +JH23f6IREjQ1x4B2UsINYfyYpmzb+R4qpMzycBVHv9ipiYQsQ8sAEQEAAYkCJQQY +AQoADwIbDAUCYfuRtwUJE8HiEAAKCRCThrSKGmk8XMAcEACd0jYXjnu7qoEY4U9Q +47X2SeJmWsuTavCrU5AWxjYwWd0mtDqK8EynxDPq7UFs+8+OukqrE++p0bfBbDl9 +TwnwmSSdizAZriHMSgeg9GR5KVL4mreNhFQdk/6mTFdlRhi5s7ZuvPayLSMIAWaj +ET5gFMeO1B/ABSpaKEZwQjRcXrto/hCUJ++7qoosblhcgwX7fiqZZbMxcoCEQIQQ +7ZasLxpVtaeDVfetp2zO5F0/e3D/sNbvBrlDofSt6D5V2cmKjLqONFVc6JrzSNeK +k9Gn8UVzAKfRfLaQyDaoFV0MbBf3q111UQQPkvwZYp0lPT6t2/G8zoubwFhHsM31 +K5ZBbt0384hI9RJITo9/krXVXLYFeCLcoPKn/fGWgAwyYAYr6C7JcocxTNUyCd1I +AVg4SO/JuC3NWFQK5LhknN/gJkFlLZdB2cWqu9dDIkx1cHXThaM2n/7GSxv7fzrI +Br1jhZjUPWJ2iOd8iHgVEkIEvZql8z+huSxcNemodEN1emmUUoIyY3Fh0lJmozDt +ZPATk3iPpksOApsDVhWXP96RjTYEozYCxgTxCnk+kX/iJIlt53BPNWm9HMTcmtDI +v3s7OEcw0DN3U2VKcL9Q4Sg3uNfhwQsw/xBJaxAHQn5lN/8t0eLt+U653ooEEr0o +ta5TfPumStSQ1UjP8pPny4l+JQ== +=UOE+ +-----END PGP PUBLIC KEY BLOCK----- +""" diff --git a/charms/slurmd/src/slurmd_ops.py b/charms/slurmd/src/slurmd_ops.py index 3f1afc5..ab3eb80 100644 --- a/charms/slurmd/src/slurmd_ops.py +++ b/charms/slurmd/src/slurmd_ops.py @@ -15,7 +15,7 @@ from typing import Any, Dict import distro -from constants import MUNGE_KEY_PATH, SLURM_GROUP, SLURM_USER, UBUNTU_HPC_PPA_KEY +from constants import MUNGE_KEY_PATH, SLURM_GROUP, SLURM_USER, UBUNTU_HPC_PPA_KEY, ROCM_PPA_KEY import charms.operator_libs_linux.v0.apt as apt # type: ignore [import-untyped] import charms.operator_libs_linux.v1.systemd as systemd # type: ignore [import-untyped] @@ -129,6 +129,47 @@ def install(self) -> bool: return package_installed +class ROCmPackagesLifecycleManager: + """Facilitate ROCm packages lifecycles.""" + + _KEYRING_PATH = Path(f"/usr/share/keyrings/rocm.gpg") + + def install(self) -> bool: + """Install ROCm packages using apt.""" + + if self._KEYRING_PATH.exists(): + self._KEYRING_PATH.unlink() + self._KEYRING_PATH.write_text(ROCM_PPA_KEY) + + amdgpu_repo = apt.DebianRepository( + enabled=True, + repotype="deb", + uri="https://repo.radeon.com/amdgpu/6.2/ubuntu", + release=distro.codename(), + groups=["main"], + gpg_key_filename=self._KEYRING_PATH, + options={ + # TODO: See if there's a way to get the charm architecture from ops. + "arch": "amd64" + } + ) + + rocm_repo = apt.DebianRepository( + enabled=True, + repotype="deb", + uri="https://repo.radeon.com/rocm/apt/6.2", + release=distro.codename(), + groups=["main"], + gpg_key_filename=self._KEYRING_PATH, + options={ + "arch": "amd64" + } + ) + + repositories = apt.RepositoryMapping() + repositories.add(amdgpu_repo) + repositories.add(rocm_repo) + class SlurmdManager: """SlurmdManager."""