From db1509a79190fa0e5ce402a120c1c4215f93afd1 Mon Sep 17 00:00:00 2001 From: Simo Tuomisto Date: Tue, 6 Apr 2021 12:58:01 +0300 Subject: [PATCH 1/6] Add support for installing cuda via runfile --- .yamllint | 33 ++++++++++++++++++++++++ defaults/main.yml | 5 ++++ files/blacklist-nouveau.conf | 2 ++ molecule/default/INSTALL.rst | 22 ++++++++++++++++ molecule/default/converge.yml | 7 +++++ molecule/default/molecule.yml | 48 +++++++++++++++++++++++++++++++++++ molecule/default/verify.yml | 13 ++++++++++ tasks/install_runfile.yml | 43 +++++++++++++++++++++++++++++++ tasks/main.yml | 8 ++++-- vars/centos-7.yml | 8 +++++- 10 files changed, 186 insertions(+), 3 deletions(-) create mode 100644 .yamllint create mode 100644 files/blacklist-nouveau.conf create mode 100644 molecule/default/INSTALL.rst create mode 100644 molecule/default/converge.yml create mode 100644 molecule/default/molecule.yml create mode 100644 molecule/default/verify.yml create mode 100644 tasks/install_runfile.yml diff --git a/.yamllint b/.yamllint new file mode 100644 index 0000000..8827676 --- /dev/null +++ b/.yamllint @@ -0,0 +1,33 @@ +--- +# Based on ansible-lint config +extends: default + +rules: + braces: + max-spaces-inside: 1 + level: error + brackets: + max-spaces-inside: 1 + level: error + colons: + max-spaces-after: -1 + level: error + commas: + max-spaces-after: -1 + level: error + comments: disable + comments-indentation: disable + document-start: disable + empty-lines: + max: 3 + level: error + hyphens: + level: error + indentation: disable + key-duplicates: enable + line-length: disable + new-line-at-end-of-file: disable + new-lines: + type: unix + trailing-spaces: disable + truthy: disable diff --git a/defaults/main.yml b/defaults/main.yml index 95d35a3..c0ac05c 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -7,6 +7,11 @@ cuda_repo_url: "http://developer.download.nvidia.com/compute/cuda/repos/" cuda_rpm_key_path: /etc/rpm/nvidia_packaging_key.asc cuda_packages: - cuda +cuda_use_runfile: False +cuda_runfile_url: "https://developer.download.nvidia.com/compute/cuda/11.2.2/local_installers/cuda_11.2.2_460.32.03_linux.run" +cuda_runfile_driver: True +cuda_runfile_toolkit: True +cuda_runfile_remove: True cuda_restart_node_on_install: True cuda_init: True cuda_init_restart_service: True diff --git a/files/blacklist-nouveau.conf b/files/blacklist-nouveau.conf new file mode 100644 index 0000000..c9b9bfc --- /dev/null +++ b/files/blacklist-nouveau.conf @@ -0,0 +1,2 @@ +blacklist nouveau +options nouveau modeset=0 diff --git a/molecule/default/INSTALL.rst b/molecule/default/INSTALL.rst new file mode 100644 index 0000000..d926ca2 --- /dev/null +++ b/molecule/default/INSTALL.rst @@ -0,0 +1,22 @@ +******* +Docker driver installation guide +******* + +Requirements +============ + +* Docker Engine + +Install +======= + +Please refer to the `Virtual environment`_ documentation for installation best +practices. If not using a virtual environment, please consider passing the +widely recommended `'--user' flag`_ when invoking ``pip``. + +.. _Virtual environment: https://virtualenv.pypa.io/en/latest/ +.. _'--user' flag: https://packaging.python.org/tutorials/installing-packages/#installing-to-the-user-site + +.. code-block:: bash + + $ python3 -m pip install 'molecule[docker]' diff --git a/molecule/default/converge.yml b/molecule/default/converge.yml new file mode 100644 index 0000000..a878209 --- /dev/null +++ b/molecule/default/converge.yml @@ -0,0 +1,7 @@ +--- +- name: Converge ansible-role-cuda + hosts: all + tasks: + - name: "Include ansible-role-cuda" + include_role: + name: "ansible-role-cuda" diff --git a/molecule/default/molecule.yml b/molecule/default/molecule.yml new file mode 100644 index 0000000..c8f99cd --- /dev/null +++ b/molecule/default/molecule.yml @@ -0,0 +1,48 @@ +--- +dependency: + name: galaxy +driver: + name: docker +platforms: + - name: centos7_cuda_repo + image: docker.io/pycontribs/centos:7 + pre_build_image: true + tmpfs: + - /run + volumes: + - /tmp/centos7_cuda_repo:/tmp:rw + - name: centos7_cuda_run + image: docker.io/pycontribs/centos:7 + pre_build_image: true + tmpfs: + - /run + volumes: + - /tmp/centos7_cuda_run:/tmp:rw +provisioner: + name: ansible + inventory: + group_vars: + all: + gpu: True + host_vars: + centos7_cuda_repo: + cuda_packages: + - cuda-libraries-11-2 + cuda_restart_node_on_install: False + cuda_init: False + cuda_init_restart_service: False + centos7_cuda_run: + cuda_use_runfile: True + cuda_runfile_driver: False # Docker has different kernel than images kernel-headers + cuda_runfile_remove: False # Keep the installer in /tmp/centos7_cuda_run for multiple runs + cuda_restart_node_on_install: False + cuda_init: False + cuda_init_restart_service: False + +verifier: + name: ansible +lint: | + set -e + yamllint . + ansible-lint + flake8 diff --git a/molecule/default/verify.yml b/molecule/default/verify.yml new file mode 100644 index 0000000..0bc0c18 --- /dev/null +++ b/molecule/default/verify.yml @@ -0,0 +1,13 @@ +--- +# This is an example playbook to execute Ansible tests. + +- name: Verify + hosts: all + tasks: + - name: Check that CUDA has been installed + stat: + path: /usr/local/cuda-11.2 + register: cuda_path_check + - name: Verify that CUDA folder exists + assert: + that: cuda_path_check.stat.exists diff --git a/tasks/install_runfile.yml b/tasks/install_runfile.yml new file mode 100644 index 0000000..dd9b692 --- /dev/null +++ b/tasks/install_runfile.yml @@ -0,0 +1,43 @@ +--- + +- name: "Ensure kernel headers are installed (yum)" + yum: + name: "{{ cuda_runfile_packages }}" + state: present + when: ansible_pkg_mgr in ["yum", "dnf"] + +- name: "Ensure kernel headers are installed (apt)" + yum: + name: + - linux-headers-generic + - build-essential + state: present + when: ansible_pkg_mgr == "apt" + +- name: "Disable nouveau" + copy: + src: blacklist-nouveau.conf + dest: /etc/modprobe.d/blacklist-nouveau.conf + +- name: "Register installer name" + set_fact: + cuda_runfile_sh: "{{ cuda_runfile_url | basename }}" + +- name: "Download runfile" + get_url: + url: "{{ cuda_runfile_url }}" + dest: "/tmp/{{ cuda_runfile_sh }}" + +- name: 'Setting runfile arguments' + set_fact: + runfile_args: "--silent {{ '--driver' if cuda_runfile_driver else '' }} {{ '--toolkit' if cuda_runfile_toolkit else '' }}" + +- name: "Run installer" + command: bash /tmp/{{ cuda_runfile_sh }} {{ runfile_args }} + register: cuda_install_out + +- name: 'Remove installer after successful install' + file: + path: /tmp/{{ cuda_runfile_sh }} + state: absent + when: cuda_install_out.rc == 0 and cuda_runfile_remove diff --git a/tasks/main.yml b/tasks/main.yml index 66760f6..436dbf7 100644 --- a/tasks/main.yml +++ b/tasks/main.yml @@ -10,10 +10,13 @@ - block: - include_tasks: configure_yum.yml - when: ansible_pkg_mgr in ['yum', 'dnf'] + when: ansible_pkg_mgr in ['yum', 'dnf'] and not cuda_use_runfile - include_tasks: configure_apt.yml - when: ansible_pkg_mgr == 'apt' + when: ansible_pkg_mgr == 'apt' and not cuda_use_runfile + + - include_tasks: install_runfile.yml + when: cuda_use_runfile - name: Install CUDA packages (1.5-2GB download, also restarts if cuda_restart_node_on_install is set to True) package: @@ -21,6 +24,7 @@ state: present with_items: "{{ cuda_packages }}" register: cuda_packages_installation + when: not cuda_use_runfile notify: - ZZ CUDA Restart server - ZZ CUDA Wait for server to restart diff --git a/vars/centos-7.yml b/vars/centos-7.yml index b331a96..5618557 100644 --- a/vars/centos-7.yml +++ b/vars/centos-7.yml @@ -1,4 +1,10 @@ --- cuda_repo_subfolder: rhel7 -# vim:ft=ansible: \ No newline at end of file +cuda_runfile_packages: + - kernel-devel + - "@Development tools" + - which + + +# vim:ft=ansible: From fbe9a83b68c07d10ee5b61062f2b3576a0251b9d Mon Sep 17 00:00:00 2001 From: Simo Tuomisto Date: Wed, 7 Apr 2021 19:37:41 +0300 Subject: [PATCH 2/6] Different NVIDIA driver installation method for ohpc system --- molecule/default/molecule.yml | 3 +- molecule/default/verify.yml | 12 +++++- tasks/install_runfile.yml | 73 ++++++++++++++++++++++++++++++----- 3 files changed, 76 insertions(+), 12 deletions(-) diff --git a/molecule/default/molecule.yml b/molecule/default/molecule.yml index c8f99cd..9b4ecda 100644 --- a/molecule/default/molecule.yml +++ b/molecule/default/molecule.yml @@ -24,6 +24,7 @@ provisioner: group_vars: all: gpu: True + cuda_driver_kernel_version: 3.10.0-1160.21.1.el7.x86_64 # The kernel to check kernel modules against host_vars: centos7_cuda_repo: cuda_packages: @@ -33,7 +34,7 @@ provisioner: cuda_init_restart_service: False centos7_cuda_run: cuda_use_runfile: True - cuda_runfile_driver: False # Docker has different kernel than images kernel-headers + cuda_runfile_driver: True # Docker has different kernel than images kernel-headers cuda_runfile_remove: False # Keep the installer in /tmp/centos7_cuda_run for multiple runs cuda_restart_node_on_install: False cuda_init: False diff --git a/molecule/default/verify.yml b/molecule/default/verify.yml index 0bc0c18..9521d16 100644 --- a/molecule/default/verify.yml +++ b/molecule/default/verify.yml @@ -1,7 +1,7 @@ --- # This is an example playbook to execute Ansible tests. -- name: Verify +- name: Verify CUDA toolkit installation hosts: all tasks: - name: Check that CUDA has been installed @@ -11,3 +11,13 @@ - name: Verify that CUDA folder exists assert: that: cuda_path_check.stat.exists +- name: Verify NVIDIA driver kernel modules + hosts: centos7_cuda_run + tasks: + - name: Check that NVIDIA kernel module has been installed + stat: + path: /lib/modules/{{ cuda_driver_kernel_version }}/video/nvidia.ko + register: nvidia_module_file + - name: Verify that kernel module exists + assert: + that: nvidia_module_file.stat.exists diff --git a/tasks/install_runfile.yml b/tasks/install_runfile.yml index dd9b692..c095ba5 100644 --- a/tasks/install_runfile.yml +++ b/tasks/install_runfile.yml @@ -23,21 +23,74 @@ set_fact: cuda_runfile_sh: "{{ cuda_runfile_url | basename }}" +- name: 'Determine running kernel' + command: uname -r + register: cuda_driver_kernel_running + +- name: 'Determine kernel version' + set_fact: + cuda_driver_kernel_version: "{{ cuda_driver_kernel_version | default(cuda_driver_kernel_running.stdout, true) }}" + +- name: 'Check NVIDIA kernel module' + stat: + path: /lib/modules/{{ cuda_driver_kernel_version }}/video/nvidia.ko + register: cuda_driver_kernel_module + +- name: "Check CUDA toolkit path" + stat: + path: /usr/local/cuda + register: cuda_toolkit_path + +- name: 'Determine if driver and toolkit are installed' + set_fact: + cuda_driver_installed: "{{ cuda_driver_kernel_module.stat.exists }}" + cuda_toolkit_installed: "{{ cuda_toolkit_path.stat.exists }}" + +- name: "Create temporary directory for runfile" + file: + path: /tmp/cuda_runfile + state: directory + - name: "Download runfile" get_url: url: "{{ cuda_runfile_url }}" - dest: "/tmp/{{ cuda_runfile_sh }}" + dest: "/tmp/cuda_runfile/{{ cuda_runfile_sh }}" + when: (cuda_runfile_toolkit and not cuda_toolkit_installed) or + (cuda_runfile_driver and not cuda_driver_installed) -- name: 'Setting runfile arguments' - set_fact: - runfile_args: "--silent {{ '--driver' if cuda_runfile_driver else '' }} {{ '--toolkit' if cuda_runfile_toolkit else '' }}" -- name: "Run installer" - command: bash /tmp/{{ cuda_runfile_sh }} {{ runfile_args }} - register: cuda_install_out +- name: "Run installer for toolkit" + command: bash /tmp/cuda_runfile/{{ cuda_runfile_sh }} --silent --toolkit + register: cuda_toolkit_install_out + when: cuda_runfile_toolkit and not cuda_toolkit_installed + +- name: 'Install driver' + block: + + - name: 'Extract installer for driver installation' + command: bash /tmp/cuda_runfile/{{ cuda_runfile_sh }} --extract=/tmp/cuda_runfile + + - name: 'Find NVIDIA runtime' + find: + paths: /tmp/cuda_runfile + patterns: 'NVIDIA*.run' + register: cuda_driver_runfile_find + + - name: 'Set NVIDIA runfile path' + set_fact: + cuda_driver_runfile: "{{ cuda_driver_runfile_find.files[0].path }}" + + - name: 'Print variables of interest' + debug: + msg: "{{ cuda_driver_runfile }} {{ cuda_driver_kernel_version }} {{ cuda_driver_kernel_running.stdout }}" + + - name: 'Install driver' + command: bash {{ cuda_driver_runfile }} --silent --kernel-name={{ cuda_driver_kernel_version }} --kernel-source-path=/usr/src/kernels/{{ cuda_driver_kernel_version }} + + when: cuda_runfile_driver and not cuda_driver_installed -- name: 'Remove installer after successful install' +- name: 'Remove installer' file: - path: /tmp/{{ cuda_runfile_sh }} + path: /tmp/cuda_runfile state: absent - when: cuda_install_out.rc == 0 and cuda_runfile_remove + when: and cuda_runfile_remove From 73eda0fc47a5c066f3df9d278c5dfc3f067f54a4 Mon Sep 17 00:00:00 2001 From: Simo Tuomisto Date: Wed, 7 Apr 2021 20:28:49 +0300 Subject: [PATCH 3/6] Changed debug statement --- tasks/install_runfile.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tasks/install_runfile.yml b/tasks/install_runfile.yml index c095ba5..e4b4de8 100644 --- a/tasks/install_runfile.yml +++ b/tasks/install_runfile.yml @@ -80,9 +80,9 @@ set_fact: cuda_driver_runfile: "{{ cuda_driver_runfile_find.files[0].path }}" - - name: 'Print variables of interest' + - name: 'Print information about driver' debug: - msg: "{{ cuda_driver_runfile }} {{ cuda_driver_kernel_version }} {{ cuda_driver_kernel_running.stdout }}" + msg: "Building driver {{ cuda_driver_runfile }} for kernel {{ cuda_driver_kernel_version }}" - name: 'Install driver' command: bash {{ cuda_driver_runfile }} --silent --kernel-name={{ cuda_driver_kernel_version }} --kernel-source-path=/usr/src/kernels/{{ cuda_driver_kernel_version }} From b32cba867ed7dcef4aaa4a1e33b82d9f67a0849d Mon Sep 17 00:00:00 2001 From: Simo Tuomisto Date: Wed, 7 Apr 2021 21:51:51 +0300 Subject: [PATCH 4/6] Allow disabling nvidia-drm-module, copying runfile from files and enabling nvidia-persistenced-service --- defaults/main.yml | 2 ++ files/nvidia-persistenced.service | 14 +++++++++++++ tasks/install_runfile.yml | 34 +++++++++++++++++++++++-------- 3 files changed, 42 insertions(+), 8 deletions(-) create mode 100644 files/nvidia-persistenced.service diff --git a/defaults/main.yml b/defaults/main.yml index c0ac05c..9d6f6bb 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -11,7 +11,9 @@ cuda_use_runfile: False cuda_runfile_url: "https://developer.download.nvidia.com/compute/cuda/11.2.2/local_installers/cuda_11.2.2_460.32.03_linux.run" cuda_runfile_driver: True cuda_runfile_toolkit: True +cuda_runfile_download: True cuda_runfile_remove: True +cuda_runfile_disable_nvidia_drm: False cuda_restart_node_on_install: True cuda_init: True cuda_init_restart_service: True diff --git a/files/nvidia-persistenced.service b/files/nvidia-persistenced.service new file mode 100644 index 0000000..8a461ed --- /dev/null +++ b/files/nvidia-persistenced.service @@ -0,0 +1,14 @@ +[Unit] +Description=NVIDIA Persistence Daemon +After=syslog.target + +[Service] +Type=forking +PIDFile=/var/run/nvidia-persistenced/nvidia-persistenced.pid +Restart=always +ExecStart=/usr/bin/nvidia-persistenced --verbose +ExecStopPost=/bin/rm -rf /var/run/nvidia-persistenced/* +TimeoutSec=300 + +[Install] +WantedBy=multi-user.target diff --git a/tasks/install_runfile.yml b/tasks/install_runfile.yml index e4b4de8..7ef052a 100644 --- a/tasks/install_runfile.yml +++ b/tasks/install_runfile.yml @@ -51,14 +51,24 @@ path: /tmp/cuda_runfile state: directory -- name: "Download runfile" - get_url: - url: "{{ cuda_runfile_url }}" - dest: "/tmp/cuda_runfile/{{ cuda_runfile_sh }}" +- name: "Obtain runfile" + block: + + - name: "Copy pre-downloaded runfile" + copy: + src: "{{ cuda_runfile_sh }}" + dest: /tmp/cuda_runfile + when: not cuda_runfile_download + + - name: "Download runfile" + get_url: + url: "{{ cuda_runfile_url }}" + dest: "/tmp/cuda_runfile/{{ cuda_runfile_sh }}" + when: cuda_runfile_download + when: (cuda_runfile_toolkit and not cuda_toolkit_installed) or (cuda_runfile_driver and not cuda_driver_installed) - - name: "Run installer for toolkit" command: bash /tmp/cuda_runfile/{{ cuda_runfile_sh }} --silent --toolkit register: cuda_toolkit_install_out @@ -85,12 +95,20 @@ msg: "Building driver {{ cuda_driver_runfile }} for kernel {{ cuda_driver_kernel_version }}" - name: 'Install driver' - command: bash {{ cuda_driver_runfile }} --silent --kernel-name={{ cuda_driver_kernel_version }} --kernel-source-path=/usr/src/kernels/{{ cuda_driver_kernel_version }} + command: bash {{ cuda_driver_runfile }} --silent --kernel-name={{ cuda_driver_kernel_version }} --kernel-source-path=/usr/src/kernels/{{ cuda_driver_kernel_version }} {{ '--no-drm' if cuda_runfile_disable_nvidia_drm else '' }} + + when: cuda_runfile_driver and not cuda_driver_installed + + - name: "Install nvidia-persistenced systemd-file" + copy: + src: files/nvidia-persistenced.service + dest: /etc/systemd/system/nvidia-persistenced.service + when: cuda_init_persistence_mode | bool when: cuda_runfile_driver and not cuda_driver_installed -- name: 'Remove installer' +- name: "Remove installer" file: path: /tmp/cuda_runfile state: absent - when: and cuda_runfile_remove + when: cuda_runfile_remove From ed5890b0830c337d39caac3e3e0c1b3c8800f264 Mon Sep 17 00:00:00 2001 From: Simo Tuomisto Date: Wed, 7 Apr 2021 21:57:37 +0300 Subject: [PATCH 5/6] Fixed string names --- tasks/install_runfile.yml | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tasks/install_runfile.yml b/tasks/install_runfile.yml index 7ef052a..5ce1d47 100644 --- a/tasks/install_runfile.yml +++ b/tasks/install_runfile.yml @@ -23,15 +23,15 @@ set_fact: cuda_runfile_sh: "{{ cuda_runfile_url | basename }}" -- name: 'Determine running kernel' +- name: "Determine running kernel" command: uname -r register: cuda_driver_kernel_running -- name: 'Determine kernel version' +- name: "Determine kernel version" set_fact: cuda_driver_kernel_version: "{{ cuda_driver_kernel_version | default(cuda_driver_kernel_running.stdout, true) }}" -- name: 'Check NVIDIA kernel module' +- name: "Check NVIDIA kernel module" stat: path: /lib/modules/{{ cuda_driver_kernel_version }}/video/nvidia.ko register: cuda_driver_kernel_module @@ -41,7 +41,7 @@ path: /usr/local/cuda register: cuda_toolkit_path -- name: 'Determine if driver and toolkit are installed' +- name: "Determine if driver and toolkit are installed" set_fact: cuda_driver_installed: "{{ cuda_driver_kernel_module.stat.exists }}" cuda_toolkit_installed: "{{ cuda_toolkit_path.stat.exists }}" @@ -74,28 +74,28 @@ register: cuda_toolkit_install_out when: cuda_runfile_toolkit and not cuda_toolkit_installed -- name: 'Install driver' +- name: "Install driver" block: - - name: 'Extract installer for driver installation' + - name: "Extract installer for driver installation" command: bash /tmp/cuda_runfile/{{ cuda_runfile_sh }} --extract=/tmp/cuda_runfile - - name: 'Find NVIDIA runtime' + - name: "Find NVIDIA runtime" find: paths: /tmp/cuda_runfile - patterns: 'NVIDIA*.run' + patterns: "NVIDIA*.run" register: cuda_driver_runfile_find - - name: 'Set NVIDIA runfile path' + - name: "Set NVIDIA runfile path" set_fact: cuda_driver_runfile: "{{ cuda_driver_runfile_find.files[0].path }}" - - name: 'Print information about driver' + - name: "Print information about driver" debug: msg: "Building driver {{ cuda_driver_runfile }} for kernel {{ cuda_driver_kernel_version }}" - - name: 'Install driver' - command: bash {{ cuda_driver_runfile }} --silent --kernel-name={{ cuda_driver_kernel_version }} --kernel-source-path=/usr/src/kernels/{{ cuda_driver_kernel_version }} {{ '--no-drm' if cuda_runfile_disable_nvidia_drm else '' }} + - name: "Install driver" + command: bash {{ cuda_driver_runfile }} --silent --kernel-name={{ cuda_driver_kernel_version }} --kernel-source-path=/usr/src/kernels/{{ cuda_driver_kernel_version }} {{ "--no-drm" if cuda_runfile_disable_nvidia_drm else "" }} when: cuda_runfile_driver and not cuda_driver_installed From f1fefb7014b02a33ade4dd74a33c357550297647 Mon Sep 17 00:00:00 2001 From: Simo Tuomisto Date: Thu, 8 Apr 2021 10:48:57 +0300 Subject: [PATCH 6/6] Added better kernel searching for idempotency --- molecule/default/verify.yml | 9 +++++---- tasks/install_runfile.yml | 24 +++++++++++++++++------- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/molecule/default/verify.yml b/molecule/default/verify.yml index 9521d16..e321c63 100644 --- a/molecule/default/verify.yml +++ b/molecule/default/verify.yml @@ -15,9 +15,10 @@ hosts: centos7_cuda_run tasks: - name: Check that NVIDIA kernel module has been installed - stat: - path: /lib/modules/{{ cuda_driver_kernel_version }}/video/nvidia.ko - register: nvidia_module_file + find: + path: /lib/modules/{{ cuda_driver_kernel_version }} + patterns: nvidia.ko + register: nvidia_module_find - name: Verify that kernel module exists assert: - that: nvidia_module_file.stat.exists + that: nvidia_module_find.matched > 0 diff --git a/tasks/install_runfile.yml b/tasks/install_runfile.yml index 5ce1d47..4ede7fc 100644 --- a/tasks/install_runfile.yml +++ b/tasks/install_runfile.yml @@ -32,9 +32,11 @@ cuda_driver_kernel_version: "{{ cuda_driver_kernel_version | default(cuda_driver_kernel_running.stdout, true) }}" - name: "Check NVIDIA kernel module" - stat: - path: /lib/modules/{{ cuda_driver_kernel_version }}/video/nvidia.ko - register: cuda_driver_kernel_module + find: + path: "/lib/modules/{{ cuda_driver_kernel_version }}" + patterns: nvidia.ko + recurse: true + register: cuda_driver_kernel_module_find - name: "Check CUDA toolkit path" stat: @@ -43,9 +45,15 @@ - name: "Determine if driver and toolkit are installed" set_fact: - cuda_driver_installed: "{{ cuda_driver_kernel_module.stat.exists }}" + cuda_driver_installed: "{{ cuda_driver_kernel_module_find.matched > 0 }}" cuda_toolkit_installed: "{{ cuda_toolkit_path.stat.exists }}" +- name: "Print information about installed features" + debug: + msg: + - "Driver installed: {{ cuda_driver_installed }}" + - "Toolkit installed: {{ cuda_toolkit_installed }}" + - name: "Create temporary directory for runfile" file: path: /tmp/cuda_runfile @@ -95,9 +103,11 @@ msg: "Building driver {{ cuda_driver_runfile }} for kernel {{ cuda_driver_kernel_version }}" - name: "Install driver" - command: bash {{ cuda_driver_runfile }} --silent --kernel-name={{ cuda_driver_kernel_version }} --kernel-source-path=/usr/src/kernels/{{ cuda_driver_kernel_version }} {{ "--no-drm" if cuda_runfile_disable_nvidia_drm else "" }} - - when: cuda_runfile_driver and not cuda_driver_installed + command: > + bash {{ cuda_driver_runfile }} --silent + --kernel-name={{ cuda_driver_kernel_version }} + --kernel-source-path=/usr/src/kernels/{{ cuda_driver_kernel_version }} + {{ "--no-drm" if cuda_runfile_disable_nvidia_drm else "" }} - name: "Install nvidia-persistenced systemd-file" copy: