From bba50008f107d838744569b58b255039d30e7a7e Mon Sep 17 00:00:00 2001 From: Simo Tuomisto Date: Mon, 7 Sep 2020 15:20:48 +0300 Subject: [PATCH] Add functionality for enabling nvidia performance counters --- defaults/main.yml | 2 ++ tasks/main.yml | 7 +++++++ templates/nvidia.conf.j2 | 1 + 3 files changed, 10 insertions(+) create mode 100644 templates/nvidia.conf.j2 diff --git a/defaults/main.yml b/defaults/main.yml index a09b830..95d35a3 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -20,4 +20,6 @@ cuda_bash_cuda_root: "/usr/local/cuda" cuda_bash_cuda_inc_dir: "/usr/local/cuda/bin" cuda_bash_cpath: "/usr/local/cuda/include" +cuda_enable_perf_counters: False + # vim:ft=ansible: diff --git a/tasks/main.yml b/tasks/main.yml index 64e4adb..28625de 100644 --- a/tasks/main.yml +++ b/tasks/main.yml @@ -35,6 +35,13 @@ - include_tasks: cuda_init.yml when: cuda_init + - name: Enable performance counters for all users via modprobe + template: + src: nvidia.conf.j2 + dest: /etc/modprobe.d/nvidia.conf + mode: 0644 + when: cuda_enable_perf_counters + # This is here because if we in the same playbook try to start slurmd without # having run the cuda_init.sh script then slurmd doesn't start and the play fails. # todo: reload nvidia modules/etc instead of restart diff --git a/templates/nvidia.conf.j2 b/templates/nvidia.conf.j2 new file mode 100644 index 0000000..aa6e3f4 --- /dev/null +++ b/templates/nvidia.conf.j2 @@ -0,0 +1 @@ +options nvidia "NVreg_RestrictProfilingToAdminUsers=0"