-
Notifications
You must be signed in to change notification settings - Fork 23
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #37 from fgci-org/cuda-run-installation
Cuda runfile installation
- Loading branch information
Showing
11 changed files
with
295 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
--- | ||
# Based on ansible-lint config | ||
extends: default | ||
|
||
rules: | ||
braces: | ||
max-spaces-inside: 1 | ||
level: error | ||
brackets: | ||
max-spaces-inside: 1 | ||
level: error | ||
colons: | ||
max-spaces-after: -1 | ||
level: error | ||
commas: | ||
max-spaces-after: -1 | ||
level: error | ||
comments: disable | ||
comments-indentation: disable | ||
document-start: disable | ||
empty-lines: | ||
max: 3 | ||
level: error | ||
hyphens: | ||
level: error | ||
indentation: disable | ||
key-duplicates: enable | ||
line-length: disable | ||
new-line-at-end-of-file: disable | ||
new-lines: | ||
type: unix | ||
trailing-spaces: disable | ||
truthy: disable |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
blacklist nouveau | ||
options nouveau modeset=0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
[Unit] | ||
Description=NVIDIA Persistence Daemon | ||
After=syslog.target | ||
|
||
[Service] | ||
Type=forking | ||
PIDFile=/var/run/nvidia-persistenced/nvidia-persistenced.pid | ||
Restart=always | ||
ExecStart=/usr/bin/nvidia-persistenced --verbose | ||
ExecStopPost=/bin/rm -rf /var/run/nvidia-persistenced/* | ||
TimeoutSec=300 | ||
|
||
[Install] | ||
WantedBy=multi-user.target |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
******* | ||
Docker driver installation guide | ||
******* | ||
|
||
Requirements | ||
============ | ||
|
||
* Docker Engine | ||
|
||
Install | ||
======= | ||
|
||
Please refer to the `Virtual environment`_ documentation for installation best | ||
practices. If not using a virtual environment, please consider passing the | ||
widely recommended `'--user' flag`_ when invoking ``pip``. | ||
|
||
.. _Virtual environment: https://virtualenv.pypa.io/en/latest/ | ||
.. _'--user' flag: https://packaging.python.org/tutorials/installing-packages/#installing-to-the-user-site | ||
|
||
.. code-block:: bash | ||
$ python3 -m pip install 'molecule[docker]' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
--- | ||
- name: Converge ansible-role-cuda | ||
hosts: all | ||
tasks: | ||
- name: "Include ansible-role-cuda" | ||
include_role: | ||
name: "ansible-role-cuda" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
--- | ||
dependency: | ||
name: galaxy | ||
driver: | ||
name: docker | ||
platforms: | ||
- name: centos7_cuda_repo | ||
image: docker.io/pycontribs/centos:7 | ||
pre_build_image: true | ||
tmpfs: | ||
- /run | ||
volumes: | ||
- /tmp/centos7_cuda_repo:/tmp:rw | ||
- name: centos7_cuda_run | ||
image: docker.io/pycontribs/centos:7 | ||
pre_build_image: true | ||
tmpfs: | ||
- /run | ||
volumes: | ||
- /tmp/centos7_cuda_run:/tmp:rw | ||
provisioner: | ||
name: ansible | ||
inventory: | ||
group_vars: | ||
all: | ||
gpu: True | ||
cuda_driver_kernel_version: 3.10.0-1160.21.1.el7.x86_64 # The kernel to check kernel modules against | ||
host_vars: | ||
centos7_cuda_repo: | ||
cuda_packages: | ||
- cuda-libraries-11-2 | ||
cuda_restart_node_on_install: False | ||
cuda_init: False | ||
cuda_init_restart_service: False | ||
centos7_cuda_run: | ||
cuda_use_runfile: True | ||
cuda_runfile_driver: True # Docker has different kernel than images kernel-headers | ||
cuda_runfile_remove: False # Keep the installer in /tmp/centos7_cuda_run for multiple runs | ||
cuda_restart_node_on_install: False | ||
cuda_init: False | ||
cuda_init_restart_service: False | ||
|
||
verifier: | ||
name: ansible | ||
lint: | | ||
set -e | ||
yamllint . | ||
ansible-lint | ||
flake8 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
--- | ||
# This is an example playbook to execute Ansible tests. | ||
|
||
- name: Verify CUDA toolkit installation | ||
hosts: all | ||
tasks: | ||
- name: Check that CUDA has been installed | ||
stat: | ||
path: /usr/local/cuda-11.2 | ||
register: cuda_path_check | ||
- name: Verify that CUDA folder exists | ||
assert: | ||
that: cuda_path_check.stat.exists | ||
- name: Verify NVIDIA driver kernel modules | ||
hosts: centos7_cuda_run | ||
tasks: | ||
- name: Check that NVIDIA kernel module has been installed | ||
find: | ||
path: /lib/modules/{{ cuda_driver_kernel_version }} | ||
patterns: nvidia.ko | ||
register: nvidia_module_find | ||
- name: Verify that kernel module exists | ||
assert: | ||
that: nvidia_module_find.matched > 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
--- | ||
|
||
- name: "Ensure kernel headers are installed (yum)" | ||
yum: | ||
name: "{{ cuda_runfile_packages }}" | ||
state: present | ||
when: ansible_pkg_mgr in ["yum", "dnf"] | ||
|
||
- name: "Ensure kernel headers are installed (apt)" | ||
yum: | ||
name: | ||
- linux-headers-generic | ||
- build-essential | ||
state: present | ||
when: ansible_pkg_mgr == "apt" | ||
|
||
- name: "Disable nouveau" | ||
copy: | ||
src: blacklist-nouveau.conf | ||
dest: /etc/modprobe.d/blacklist-nouveau.conf | ||
|
||
- name: "Register installer name" | ||
set_fact: | ||
cuda_runfile_sh: "{{ cuda_runfile_url | basename }}" | ||
|
||
- name: "Determine running kernel" | ||
command: uname -r | ||
register: cuda_driver_kernel_running | ||
|
||
- name: "Determine kernel version" | ||
set_fact: | ||
cuda_driver_kernel_version: "{{ cuda_driver_kernel_version | default(cuda_driver_kernel_running.stdout, true) }}" | ||
|
||
- name: "Check NVIDIA kernel module" | ||
find: | ||
path: "/lib/modules/{{ cuda_driver_kernel_version }}" | ||
patterns: nvidia.ko | ||
recurse: true | ||
register: cuda_driver_kernel_module_find | ||
|
||
- name: "Check CUDA toolkit path" | ||
stat: | ||
path: /usr/local/cuda | ||
register: cuda_toolkit_path | ||
|
||
- name: "Determine if driver and toolkit are installed" | ||
set_fact: | ||
cuda_driver_installed: "{{ cuda_driver_kernel_module_find.matched > 0 }}" | ||
cuda_toolkit_installed: "{{ cuda_toolkit_path.stat.exists }}" | ||
|
||
- name: "Print information about installed features" | ||
debug: | ||
msg: | ||
- "Driver installed: {{ cuda_driver_installed }}" | ||
- "Toolkit installed: {{ cuda_toolkit_installed }}" | ||
|
||
- name: "Create temporary directory for runfile" | ||
file: | ||
path: /tmp/cuda_runfile | ||
state: directory | ||
|
||
- name: "Obtain runfile" | ||
block: | ||
|
||
- name: "Copy pre-downloaded runfile" | ||
copy: | ||
src: "{{ cuda_runfile_sh }}" | ||
dest: /tmp/cuda_runfile | ||
when: not cuda_runfile_download | ||
|
||
- name: "Download runfile" | ||
get_url: | ||
url: "{{ cuda_runfile_url }}" | ||
dest: "/tmp/cuda_runfile/{{ cuda_runfile_sh }}" | ||
when: cuda_runfile_download | ||
|
||
when: (cuda_runfile_toolkit and not cuda_toolkit_installed) or | ||
(cuda_runfile_driver and not cuda_driver_installed) | ||
|
||
- name: "Run installer for toolkit" | ||
command: bash /tmp/cuda_runfile/{{ cuda_runfile_sh }} --silent --toolkit | ||
register: cuda_toolkit_install_out | ||
when: cuda_runfile_toolkit and not cuda_toolkit_installed | ||
|
||
- name: "Install driver" | ||
block: | ||
|
||
- name: "Extract installer for driver installation" | ||
command: bash /tmp/cuda_runfile/{{ cuda_runfile_sh }} --extract=/tmp/cuda_runfile | ||
|
||
- name: "Find NVIDIA runtime" | ||
find: | ||
paths: /tmp/cuda_runfile | ||
patterns: "NVIDIA*.run" | ||
register: cuda_driver_runfile_find | ||
|
||
- name: "Set NVIDIA runfile path" | ||
set_fact: | ||
cuda_driver_runfile: "{{ cuda_driver_runfile_find.files[0].path }}" | ||
|
||
- name: "Print information about driver" | ||
debug: | ||
msg: "Building driver {{ cuda_driver_runfile }} for kernel {{ cuda_driver_kernel_version }}" | ||
|
||
- name: "Install driver" | ||
command: > | ||
bash {{ cuda_driver_runfile }} --silent | ||
--kernel-name={{ cuda_driver_kernel_version }} | ||
--kernel-source-path=/usr/src/kernels/{{ cuda_driver_kernel_version }} | ||
{{ "--no-drm" if cuda_runfile_disable_nvidia_drm else "" }} | ||
- name: "Install nvidia-persistenced systemd-file" | ||
copy: | ||
src: files/nvidia-persistenced.service | ||
dest: /etc/systemd/system/nvidia-persistenced.service | ||
when: cuda_init_persistence_mode | bool | ||
|
||
when: cuda_runfile_driver and not cuda_driver_installed | ||
|
||
- name: "Remove installer" | ||
file: | ||
path: /tmp/cuda_runfile | ||
state: absent | ||
when: cuda_runfile_remove |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,10 @@ | ||
--- | ||
cuda_repo_subfolder: rhel7 | ||
|
||
# vim:ft=ansible: | ||
cuda_runfile_packages: | ||
- kernel-devel | ||
- "@Development tools" | ||
- which | ||
|
||
|
||
# vim:ft=ansible: |