Skip to content

Commit

Permalink
Begin implementation of ROCm driver installation
Browse files Browse the repository at this point in the history
  • Loading branch information
jedel1043 committed Sep 9, 2024
1 parent 232d4c0 commit 47b974b
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 1 deletion.
14 changes: 14 additions & 0 deletions charms/slurmd/charmcraft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,20 @@ config:
$ juju config slurmd nhc-conf="$(cat extra-nhc.conf)"
```
gpu:
default: ""
type: string
description: >
Type of GPU driver to install.
Available options: [`amd`].
Example usage:
```bash
$ juju config slurmd gpu=amd
```
actions:
node-configured:
description: Remove a node from DownNodes when the reason is `New node`.
Expand Down
55 changes: 55 additions & 0 deletions charms/slurmd/src/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,58 @@
=cs1s
-----END PGP PUBLIC KEY BLOCK-----
"""

ROCM_PPA_KEY = """
----BEGIN PGP PUBLIC KEY BLOCK-----
Version: GnuPG v1
mQINBFefsSABEADmVqQyRi5bcUs/eG8mnKLdY+V+xuKuHLuujlXinSaMFRO640Md
C2HNYLSd58Z8cB1rKfiN639CZp+SkDWq60cFXDCcX9djT0JmBzsTD/gwoMr16tMY
O+Z2mje2pEYgDJdmYrephhXn29BfebW1IQKdA+4C7l675mJ/T8yVMUNXC0hqfGDA
h1MJUQy/lz1S2fGdjCKX0PiYOnCOyhNa7aTpw9PkZWgEa/s4BhplFZxvLohrCcf6
ks0gUITHfeEhJvj2KurRfL68DgFifGnG+/fsMHgW1Xp19GsnIVaoh6cV7/iFHhrb
6YHI1fdOq/mwOfG8mJnXmDXC/o24Q7mRRwvoJcsT0j+thRirs8trV01mKY+7Hxd2
CamWttibo062pjWN2aEUMPmEU2kmGOupsZtlpqn6SGCd2+6maOPMNEq/F0EWxhul
q6mgezVb8pvJ3bwvph2/lMSgfT9fHs6UIh4i/3rnA5/JaejFonlnS9xEuglKjklj
UoikSPBOwjvoPW2u99WCflURFSXVvuk7Ci+XkbVPIZyD6gFJjeY02Ic5MAv5tj/z
0fpgr/CfwEllms+z7qz768xRweA0kmPTTARdufVTna6EV3K3njxvCIIfnrp1cF6S
e3VrREd98gO0Rmzy74UFqkXl9Tb/+UILx1qVRmOBinwacKGqzo+k9jPUKQARAQAB
tChBTUQgTUxTRSBEZXZPcHMgPGRsLk1MU0UuRGV2T3BzQGFtZC5jb20+iQI+BBMB
AgAoAhsDBgsJCAcDAgYVCAIJCgsEFgIDAQIeAQIXgAUCYfuRkwUJE8Hh5wAKCRCT
hrSKGmk8XI1AEACSJLVGHCLJOOKz9fbUR4KWl7Gpv0RWccwxhH01jNZTSXUCEnKA
2KYmaqFvrT5szxWILobmCNYtAlbdkpUfb0mMaF3UtTu+1UMOw2ExzxHw1FyA+z6d
vLqDKXLldsOFUfojDUhD5cK6uvONPc1orCf/4ve6wnRG838bAzb4VrFR64IxfPjx
NukH+jo2nEXNpnNv44DEiq65CcObaPuwAVBFnRYD/ByPO4ZArxFXqNzHRxpoZkKv
iwzhbPG4cirioqzRR9y2SsC+a2sO4a/jH0wOL2+n4L86xShYcuCBxXvS/AwrV/aO
JxKOfAUV4VQegAOQz64L+iz7PslNSTILJGdvGcC5Ckgpo6evdWBT7KdGXhzf4S1f
wZjYyP9sfQa7LxqyrkLHZqYt4If4Jmukx7cApBYp1nPnuCQrLU6D4Arq0ZVWQuNV
hbABLeqwdVQcX+vG/Kr/ZC+Vkv3Z8oElwVGAAQ6HNXr/u8ud2bu6iNJ5mcQbM1HD
KTNt5LUrk0p588a8dk0/TyC5xeKSv51iNL+aOVaTr0pRwgaHtEVar2i0FPC1mkr4
1hhIDddx8WLoUt/52f1juyr/4CpL1M5f1cbMVjV6i0kqIEx/hxrryc+fZZQT5R4M
vysxcsh8ttgpABG5vzz2rLOCanmQ4eDdmlugzn/u0ngoDdnC0gEfnVVutLkCDQRX
n7EgARAAlsWVKSOQicuBxBlo3U5tre5whSyAOWHuy6/heGwCkGssTahbIL8pRwOL
5nKJCPCKKJ4YYoZ+Jzer9WTsDRZU/zpQXK9C5WdfF6DN/Fai3lqhgeDDVyF0hUDr
NQigm/w66JEYTGtMcC5PnYv7S6Zrn9WN4anv9n5thNwfsqxpbbg6sAQ2aLHLsW96
myQE9v1s0YoSZYc7rFYBwszE+tFX0kLlyBYSRVns/USQifu66RObO706d8DHp6Ro
vO6WgsTu+0RR2FEUabBx1q6iKe1cqK0FYtWd8tXCpqQBm0zGC6UwTp4Z4GMCX2Pk
3xAMmrItW5kPKCANB+P/8ZoOoZLIX5Fr9axQ496lUh0ZDhOACewJfj9Szk9GN5rq
+2QKnRepatevGBVaN0lCAEwg2q9/9xmrT6CixFrbnw2T6mWHM3jQrvduqmC0c1Cd
uMZBGDKSpjouaN0UKtC+udwWiY7w452pcjCnUjzjk7tR1IarSCnLLYeb+MDCK83M
CFH60SmBfdqjRiTiLas34KSKNnmbfUfrTYswf0Oed/qXAUSlYOCmWl4sV8n+Ebpy
XfY80/fzu95RbpMEZMhUTRtvr64O5jaWM/lFnubnegGTW3Bk/fBR2VRsBx56ZHlc
JH23f6IREjQ1x4B2UsINYfyYpmzb+R4qpMzycBVHv9ipiYQsQ8sAEQEAAYkCJQQY
AQoADwIbDAUCYfuRtwUJE8HiEAAKCRCThrSKGmk8XMAcEACd0jYXjnu7qoEY4U9Q
47X2SeJmWsuTavCrU5AWxjYwWd0mtDqK8EynxDPq7UFs+8+OukqrE++p0bfBbDl9
TwnwmSSdizAZriHMSgeg9GR5KVL4mreNhFQdk/6mTFdlRhi5s7ZuvPayLSMIAWaj
ET5gFMeO1B/ABSpaKEZwQjRcXrto/hCUJ++7qoosblhcgwX7fiqZZbMxcoCEQIQQ
7ZasLxpVtaeDVfetp2zO5F0/e3D/sNbvBrlDofSt6D5V2cmKjLqONFVc6JrzSNeK
k9Gn8UVzAKfRfLaQyDaoFV0MbBf3q111UQQPkvwZYp0lPT6t2/G8zoubwFhHsM31
K5ZBbt0384hI9RJITo9/krXVXLYFeCLcoPKn/fGWgAwyYAYr6C7JcocxTNUyCd1I
AVg4SO/JuC3NWFQK5LhknN/gJkFlLZdB2cWqu9dDIkx1cHXThaM2n/7GSxv7fzrI
Br1jhZjUPWJ2iOd8iHgVEkIEvZql8z+huSxcNemodEN1emmUUoIyY3Fh0lJmozDt
ZPATk3iPpksOApsDVhWXP96RjTYEozYCxgTxCnk+kX/iJIlt53BPNWm9HMTcmtDI
v3s7OEcw0DN3U2VKcL9Q4Sg3uNfhwQsw/xBJaxAHQn5lN/8t0eLt+U653ooEEr0o
ta5TfPumStSQ1UjP8pPny4l+JQ==
=UOE+
-----END PGP PUBLIC KEY BLOCK-----
"""
43 changes: 42 additions & 1 deletion charms/slurmd/src/slurmd_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from typing import Any, Dict

import distro
from constants import MUNGE_KEY_PATH, SLURM_GROUP, SLURM_USER, UBUNTU_HPC_PPA_KEY
from constants import MUNGE_KEY_PATH, SLURM_GROUP, SLURM_USER, UBUNTU_HPC_PPA_KEY, ROCM_PPA_KEY

import charms.operator_libs_linux.v0.apt as apt # type: ignore [import-untyped]
import charms.operator_libs_linux.v1.systemd as systemd # type: ignore [import-untyped]
Expand Down Expand Up @@ -129,6 +129,47 @@ def install(self) -> bool:

return package_installed

class ROCmPackagesLifecycleManager:
"""Facilitate ROCm packages lifecycles."""

_KEYRING_PATH = Path(f"/usr/share/keyrings/rocm.gpg")

def install(self) -> bool:
"""Install ROCm packages using apt."""

if self._KEYRING_PATH.exists():
self._KEYRING_PATH.unlink()
self._KEYRING_PATH.write_text(ROCM_PPA_KEY)

amdgpu_repo = apt.DebianRepository(
enabled=True,
repotype="deb",
uri="https://repo.radeon.com/amdgpu/6.2/ubuntu",
release=distro.codename(),
groups=["main"],
gpg_key_filename=self._KEYRING_PATH,
options={
# TODO: See if there's a way to get the charm architecture from ops.
"arch": "amd64"
}
)

rocm_repo = apt.DebianRepository(
enabled=True,
repotype="deb",
uri="https://repo.radeon.com/rocm/apt/6.2",
release=distro.codename(),
groups=["main"],
gpg_key_filename=self._KEYRING_PATH,
options={
"arch": "amd64"
}
)

repositories = apt.RepositoryMapping()
repositories.add(amdgpu_repo)
repositories.add(rocm_repo)


class SlurmdManager:
"""SlurmdManager."""
Expand Down

0 comments on commit 47b974b

Please sign in to comment.