From ea5c789841ea500c063211140d7baf2c68173fcc Mon Sep 17 00:00:00 2001 From: Deezzir Date: Fri, 29 Nov 2024 17:19:16 -0500 Subject: [PATCH 1/2] Add nouveau check and modprobe --- src/hw_tools.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/hw_tools.py b/src/hw_tools.py index 3d8f73ab..a0bcb427 100644 --- a/src/hw_tools.py +++ b/src/hw_tools.py @@ -277,13 +277,19 @@ def install(self) -> None: logger.info("NVIDIA driver already installed in the machine") return + if 0 == subprocess.call("lsmod | grep nouveau", shell=True): + logger.error("Nouveau driver is loaded. Unload it before installing NVIDIA driver") + raise ResourceInstallationError(self._name) + logger.info("Installing NVIDIA driver") apt.add_package("ubuntu-drivers-common", update_cache=True) try: # This can be changed to check_call and not rely in the output if this is fixed # https://github.com/canonical/ubuntu-drivers-common/issues/106 - result = subprocess.check_output("ubuntu-drivers install --gpgpu".split(), text=True) + # https://bugs.launchpad.net/ubuntu/+source/ubuntu-drivers-common/+bug/2090502 + result = subprocess.check_output("ubuntu-drivers --gpgpu install".split(), text=True) + subprocess.call("modprobe nvidia".split()) except subprocess.CalledProcessError as err: logger.error("Failed to install the NVIDIA driver: %s", err) From e66d60e1843a88484a2c321f32690f054383e5f5 Mon Sep 17 00:00:00 2001 From: Deezzir Date: Mon, 2 Dec 2024 16:33:08 -0500 Subject: [PATCH 2/2] Add unit tests for the new logic --- src/hw_tools.py | 9 +++++---- tests/unit/test_hw_tools.py | 19 +++++++++++++++++-- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/src/hw_tools.py b/src/hw_tools.py index a0bcb427..f617fc88 100644 --- a/src/hw_tools.py +++ b/src/hw_tools.py @@ -277,9 +277,10 @@ def install(self) -> None: logger.info("NVIDIA driver already installed in the machine") return - if 0 == subprocess.call("lsmod | grep nouveau", shell=True): - logger.error("Nouveau driver is loaded. Unload it before installing NVIDIA driver") - raise ResourceInstallationError(self._name) + with open("/proc/modules", encoding="utf-8") as modules: + if "nouveau" in modules.read(): + logger.error("Nouveau driver is loaded. Unload it before installing NVIDIA driver") + raise ResourceInstallationError(self._name) logger.info("Installing NVIDIA driver") apt.add_package("ubuntu-drivers-common", update_cache=True) @@ -289,7 +290,7 @@ def install(self) -> None: # https://github.com/canonical/ubuntu-drivers-common/issues/106 # https://bugs.launchpad.net/ubuntu/+source/ubuntu-drivers-common/+bug/2090502 result = subprocess.check_output("ubuntu-drivers --gpgpu install".split(), text=True) - subprocess.call("modprobe nvidia".split()) + subprocess.check_call("modprobe nvidia".split()) except subprocess.CalledProcessError as err: logger.error("Failed to install the NVIDIA driver: %s", err) diff --git a/tests/unit/test_hw_tools.py b/tests/unit/test_hw_tools.py index 55d29dc3..481d63de 100644 --- a/tests/unit/test_hw_tools.py +++ b/tests/unit/test_hw_tools.py @@ -1219,7 +1219,7 @@ def test_dcgm_create_custom_metrics_copy_fail( def test_nvidia_driver_strategy_install_success( - mock_path, mock_check_output, mock_apt_lib, nvidia_driver_strategy + mock_path, mock_check_output, mock_apt_lib, mock_check_call, nvidia_driver_strategy ): nvidia_version = mock.MagicMock() nvidia_version.exists.return_value = False @@ -1228,7 +1228,8 @@ def test_nvidia_driver_strategy_install_success( nvidia_driver_strategy.install() mock_apt_lib.add_package.assert_called_once_with("ubuntu-drivers-common", update_cache=True) - mock_check_output.assert_called_once_with("ubuntu-drivers install --gpgpu".split(), text=True) + mock_check_output.assert_called_once_with("ubuntu-drivers --gpgpu install".split(), text=True) + mock_check_call.assert_called_once_with("modprobe nvidia".split()) def test_install_nvidia_drivers_already_installed( @@ -1244,6 +1245,20 @@ def test_install_nvidia_drivers_already_installed( mock_check_output.assert_not_called() +def test_install_nvidia_drivers_nouveau_installed(mock_path, nvidia_driver_strategy, mock_apt_lib): + nvidia_version = mock.MagicMock() + nvidia_version.exists.return_value = False + mock_path.return_value = nvidia_version + mocked_open = mock.mock_open(read_data="nouveau") + + with mock.patch("builtins.open", mocked_open): + with pytest.raises(ResourceInstallationError): + nvidia_driver_strategy.install() + + mock_apt_lib.add_package.assert_not_called() + mocked_open.assert_called_once_with("/proc/modules", encoding="utf-8") + + def test_install_nvidia_drivers_subprocess_exception( mock_path, mock_check_output, mock_apt_lib, nvidia_driver_strategy ):