From f169fd90aca49afe414b9eb7436de18adf8f3374 Mon Sep 17 00:00:00 2001 From: arnauW Date: Tue, 17 Jul 2018 14:11:44 +0200 Subject: [PATCH] fix some isues found with ubuntu and ganglia GPU monitoring --- gpu/nvidia/README | 2 +- gpu/nvidia/conf.d/modpython.conf | 8 + gpu/nvidia/ganglia_web.patch | 145 -- .../build/lib/nvidia_smi.py | 455 ----- .../nvidia-ml-py-3.295.00/build/lib/pynvml.py | 903 --------- .../nvidia-ml-py-3.295.00/nvidia_smi.py | 455 ----- gpu/nvidia/nvidia-ml-py-3.295.00/pynvml.py | 903 --------- .../PKG-INFO | 4 +- .../README.txt | 19 +- gpu/nvidia/nvidia-ml-py-7.352.0/nvidia_smi.py | 873 +++++++++ gpu/nvidia/nvidia-ml-py-7.352.0/pynvml.py | 1701 +++++++++++++++++ .../setup.py | 8 +- 12 files changed, 2606 insertions(+), 2870 deletions(-) create mode 100644 gpu/nvidia/conf.d/modpython.conf delete mode 100644 gpu/nvidia/ganglia_web.patch delete mode 100644 gpu/nvidia/nvidia-ml-py-3.295.00/build/lib/nvidia_smi.py delete mode 100644 gpu/nvidia/nvidia-ml-py-3.295.00/build/lib/pynvml.py delete mode 100644 gpu/nvidia/nvidia-ml-py-3.295.00/nvidia_smi.py delete mode 100644 gpu/nvidia/nvidia-ml-py-3.295.00/pynvml.py rename gpu/nvidia/{nvidia-ml-py-3.295.00 => nvidia-ml-py-7.352.0}/PKG-INFO (94%) rename gpu/nvidia/{nvidia-ml-py-3.295.00 => nvidia-ml-py-7.352.0}/README.txt (87%) create mode 100644 gpu/nvidia/nvidia-ml-py-7.352.0/nvidia_smi.py create mode 100644 gpu/nvidia/nvidia-ml-py-7.352.0/pynvml.py rename gpu/nvidia/{nvidia-ml-py-3.295.00 => nvidia-ml-py-7.352.0}/setup.py (89%) diff --git a/gpu/nvidia/README b/gpu/nvidia/README index 60bbaf04..9b44cdeb 100644 --- a/gpu/nvidia/README +++ b/gpu/nvidia/README @@ -8,7 +8,7 @@ Installation instructions: For the latest bindings see: http://pypi.python.org/pypi/nvidia-ml-py/ You can do a site install or place it in {libdir}/ganglia/python_modules * Copy python_modules/nvidia.py to {libdir}/ganglia/python_modules -* Copy conf.d/nvidia.pyconf to /etc/ganglia/conf.d +* Copy conf.d/* to /etc/ganglia/conf.d/ * Copy graph.d/* to {ganglia_webroot}/graph.d/ * A demo of what the GPU graphs look like is available here: http://ganglia.ddbj.nig.ac.jp/?c=research+month+gpu+queue&h=t135i&m=load_one&r=hour&s=by+name&hc=4&mc=2 diff --git a/gpu/nvidia/conf.d/modpython.conf b/gpu/nvidia/conf.d/modpython.conf new file mode 100644 index 00000000..d00795d2 --- /dev/null +++ b/gpu/nvidia/conf.d/modpython.conf @@ -0,0 +1,8 @@ +modules { + module { + name = "python_module" + path = "/usr/lib/ganglia/modpython.so" + params = "/usr/lib/ganglia/python_modules/" + } +} +include ('/etc/ganglia/conf.d/*.pyconf') diff --git a/gpu/nvidia/ganglia_web.patch b/gpu/nvidia/ganglia_web.patch deleted file mode 100644 index 78c0a0b3..00000000 --- a/gpu/nvidia/ganglia_web.patch +++ /dev/null @@ -1,145 +0,0 @@ -Index: host_view.php -=================================================================== ---- host_view.php (revision 2612) -+++ host_view.php (working copy) -@@ -17,8 +17,8 @@ - $tpl->assign("node_msg", "This host is down."); - - $cluster_url=rawurlencode($clustername); --$tpl->assign("cluster_url", $cluster_url); --$tpl->assign("graphargs", "h=$hostname&$get_metric_string&st=$cluster[LOCALTIME]"); -+$tpl->assignGlobal("cluster_url", $cluster_url); -+$tpl->assignGlobal("graphargs", "h=$hostname&$get_metric_string&st=$cluster[LOCALTIME]"); - - # For the node view link. - $tpl->assign("node_view","./?p=2&c=$cluster_url&h=$hostname"); -@@ -37,6 +37,33 @@ - - foreach ($metrics as $name => $v) - { -+ if ($name == "gpu0_mem_used") { -+ $tpl->newBlock('gpu0_mem'); -+ } -+ if ($name == "gpu1_mem_used") { -+ $tpl->newBlock('gpu1_mem'); -+ } -+ if ($name == "gpu2_mem_used") { -+ $tpl->newBlock('gpu2_mem'); -+ } -+ if ($name == "gpu3_mem_used") { -+ $tpl->newBlock('gpu3_mem'); -+ } -+ if ($name == "gpu0_util") { -+ $tpl->newBlock('gpu_util'); -+ } -+ if ($name == "gpu0_mem_util") { -+ $tpl->newBlock('gpu_mem_util'); -+ } -+ if ($name == "gpu0_core_speed") { -+ $tpl->newBlock('gpu_core_speed'); -+ } -+ if ($name == "gpu0_sm_speed") { -+ $tpl->newBlock('gpu_sm_speed'); -+ } -+ if ($name == "gpu0_mem_speed") { -+ $tpl->newBlock('gpu_mem_speed'); -+ } - if ($v['TYPE'] == "string" or $v['TYPE']=="timestamp" or - (isset($always_timestamp[$name]) and $always_timestamp[$name])) - { -@@ -144,7 +171,7 @@ - if ( is_array($g_metrics) && is_array($g_metrics_group) ) - { - ksort($g_metrics_group); -- -+ $host_metrics = 0; - foreach ( $g_metrics_group as $group => $metric_array ) - { - if ( $group == "" ) { -@@ -153,6 +180,7 @@ - $tpl->newBlock("vol_group_info"); - $tpl->assign("group", $group); - $c = count($metric_array); -+ $host_metrics += $c; - $tpl->assign("group_metric_count", $c); - $i = 0; - ksort($g_metrics); -@@ -169,7 +197,8 @@ - } - } - } -- -+ $tpl->gotoBlock("_ROOT"); -+ $tpl->assign("host_metric_count", $host_metrics); - } - - $tpl->printToScreen(); -Index: templates/default/host_view.tpl -=================================================================== ---- templates/default/host_view.tpl (revision 2612) -+++ templates/default/host_view.tpl (working copy) -@@ -80,6 +80,55 @@ - {cluster_url} PACKETS - -+ -+{cluster_url} GPU0_MEM -+ -+ -+ -+{cluster_url} GPU1_MEM -+ -+ -+ -+{cluster_url} GPU2_MEM -+ -+ -+ -+{cluster_url} GPU3_MEM -+ -+ -+ -+{cluster_url} GPU_CORE_SPEED -+ -+ -+ -+{cluster_url} GPU_SM_SPEED -+ -+ -+ -+{cluster_url} GPU_MEM_SPEED -+ -+ -+ -+{cluster_url} GPU_UTIL -+ -+ -+ -+{cluster_url} GPU_MEM_UTIL -+ -+ -+ -+{cluster_url} GPU_TEMP -+ - - - -@@ -89,7 +138,7 @@ - - -
-- {host} graphs -+ {host} graphs ({host_metric_count}) - last {range} - sorted {sort} - diff --git a/gpu/nvidia/nvidia-ml-py-3.295.00/build/lib/nvidia_smi.py b/gpu/nvidia/nvidia-ml-py-3.295.00/build/lib/nvidia_smi.py deleted file mode 100644 index f1a42707..00000000 --- a/gpu/nvidia/nvidia-ml-py-3.295.00/build/lib/nvidia_smi.py +++ /dev/null @@ -1,455 +0,0 @@ -##### -# Copyright (c) 2011-2012, NVIDIA Corporation. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of the NVIDIA Corporation nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -# THE POSSIBILITY OF SUCH DAMAGE. -##### - -# -# nvidia_smi -# nvml_bindings nvidia com -# -# Sample code that attempts to reproduce the output of nvidia-smi -q- x -# For many cases the output should match -# -# To Run: -# $ python -# Python 2.7 (r27:82500, Sep 16 2010, 18:02:00) -# [GCC 4.5.1 20100907 (Red Hat 4.5.1-3)] on linux2 -# Type "help", "copyright", "credits" or "license" for more information. -# >>> import nvidia_smi -# >>> print(nvidia_smi.XmlDeviceQuery()) -# ... -# - -from pynvml import * -import datetime - -# -# Helper functions -# -def GetEccByType(handle, counterType, bitType): - try: - count = str(nvmlDeviceGetTotalEccErrors(handle, bitType, counterType)) - except NVMLError as err: - count = handleError(err) - - try: - detail = nvmlDeviceGetDetailedEccErrors(handle, bitType, counterType) - deviceMemory = str(detail.deviceMemory) - registerFile = str(detail.registerFile) - l1Cache = str(detail.l1Cache) - l2Cache = str(detail.l2Cache) - except NVMLError as err: - msg = handleError(err) - deviceMemory = msg - registerFile = msg - l1Cache = msg - l2Cache = msg - strResult = '' - strResult += ' ' + deviceMemory + '\n' - strResult += ' ' + registerFile + '\n' - strResult += ' ' + l1Cache + '\n' - strResult += ' ' + l2Cache + '\n' - strResult += ' ' + count + '\n' - return strResult - -def GetEccByCounter(handle, counterType): - strResult = '' - strResult += ' \n' - strResult += str(GetEccByType(handle, counterType, NVML_SINGLE_BIT_ECC)) - strResult += ' \n' - strResult += ' \n' - strResult += str(GetEccByType(handle, counterType, NVML_DOUBLE_BIT_ECC)) - strResult += ' \n' - return strResult - -def GetEccStr(handle): - strResult = '' - strResult += ' \n' - strResult += str(GetEccByCounter(handle, NVML_VOLATILE_ECC)) - strResult += ' \n' - strResult += ' \n' - strResult += str(GetEccByCounter(handle, NVML_AGGREGATE_ECC)) - strResult += ' \n' - return strResult - -# -# Converts errors into string messages -# -def handleError(err): - if (err.value == NVML_ERROR_NOT_SUPPORTED): - return "N/A" - else: - return err.__str__() - -####### -def XmlDeviceQuery(): - - try: - # - # Initialize NVML - # - nvmlInit() - strResult = '' - - strResult += '\n' - strResult += '\n' - strResult += '\n' - - strResult += ' ' + str(datetime.date.today()) + '\n' - strResult += ' ' + str(nvmlSystemGetDriverVersion()) + '\n' - - deviceCount = nvmlDeviceGetCount() - strResult += ' ' + str(deviceCount) + '\n' - - for i in range(0, deviceCount): - handle = nvmlDeviceGetHandleByIndex(i) - - pciInfo = nvmlDeviceGetPciInfo(handle) - - strResult += ' \n' % pciInfo.busId - - strResult += ' ' + nvmlDeviceGetName(handle) + '\n' - - try: - state = ('Enabled' if (nvmlDeviceGetDisplayMode(handle) != 0) else 'Disabled') - except NVMLError as err: - state = handleError(err) - - strResult += ' ' + state + '\n' - - try: - mode = 'Enabled' if (nvmlDeviceGetPersistenceMode(handle) != 0) else 'Disabled' - except NVMLError as err: - mode = handleError(err) - - strResult += ' ' + mode + '\n' - - strResult += ' \n' - - try: - current = str(nvmlDeviceGetCurrentDriverModel(handle)) - except NVMLError as err: - current = handleError(err) - strResult += ' ' + current + '\n' - - try: - pending = str(nvmlDeviceGetPendingDriverModel(handle)) - except NVMLError as err: - pending = handleError(err) - - strResult += ' ' + pending + '\n' - - strResult += ' \n' - - try: - serial = nvmlDeviceGetSerial(handle) - except NVMLError as err: - serial = handleError(err) - - strResult += ' ' + serial + '\n' - - try: - uuid = nvmlDeviceGetUUID(handle) - except NVMLError as err: - uuid = handleError(err) - - strResult += ' ' + uuid + '\n' - - try: - vbios = nvmlDeviceGetVbiosVersion(handle) - except NVMLError as err: - vbios = handleError(err) - - strResult += ' ' + vbios + '\n' - - strResult += ' \n' - - try: - oem = nvmlDeviceGetInforomVersion(handle, NVML_INFOROM_OEM) - if oem == '': - oem = 'N/A' - except NVMLError as err: - oem = handleError(err) - - strResult += ' ' + oem + '\n' - - try: - ecc = nvmlDeviceGetInforomVersion(handle, NVML_INFOROM_ECC) - if ecc == '': - ecc = 'N/A' - except NVMLError as err: - ecc = handleError(err) - - strResult += ' ' + ecc + '\n' - try: - pwr = nvmlDeviceGetInforomVersion(handle, NVML_INFOROM_POWER) - if pwr == '': - pwr = 'N/A' - except NVMLError as err: - pwr = handleError(err) - - strResult += ' ' + pwr + '\n' - strResult += ' \n' - - strResult += ' \n' - strResult += ' %02X\n' % pciInfo.bus - strResult += ' %02X\n' % pciInfo.device - strResult += ' %04X\n' % pciInfo.domain - strResult += ' %08X\n' % (pciInfo.pciDeviceId) - strResult += ' %08X\n' % (pciInfo.pciSubSystemId) - strResult += ' ' + str(pciInfo.busId) + '\n' - strResult += ' \n' - - - strResult += ' \n' - - try: - gen = str(nvmlDeviceGetMaxPcieLinkGeneration(handle)) - except NVMLError as err: - gen = handleError(err) - - strResult += ' ' + gen + '\n' - - try: - gen = str(nvmlDeviceGetCurrPcieLinkGeneration(handle)) - except NVMLError as err: - gen = handleError(err) - - strResult += ' ' + gen + '\n' - strResult += ' \n' - strResult += ' \n' - - try: - width = str(nvmlDeviceGetMaxPcieLinkWidth(handle)) + 'x' - except NVMLError as err: - width = handleError(err) - - strResult += ' ' + width + '\n' - - try: - width = str(nvmlDeviceGetCurrPcieLinkWidth(handle)) + 'x' - except NVMLError as err: - width = handleError(err) - - strResult += ' ' + width + '\n' - - strResult += ' \n' - strResult += ' \n' - strResult += ' \n' - - try: - fan = str(nvmlDeviceGetFanSpeed(handle)) + ' %' - except NVMLError as err: - fan = handleError(err) - strResult += ' ' + fan + '\n' - - try: - memInfo = nvmlDeviceGetMemoryInfo(handle) - mem_total = str(memInfo.total / 1024 / 1024) + ' MB' - mem_used = str(memInfo.used / 1024 / 1024) + ' MB' - mem_free = str(memInfo.free / 1024 / 1024) + ' MB' - except NVMLError as err: - error = handleError(err) - mem_total = error - mem_used = error - mem_free = error - - strResult += ' \n' - strResult += ' ' + mem_total + '\n' - strResult += ' ' + mem_used + '\n' - strResult += ' ' + mem_free + '\n' - strResult += ' \n' - - - try: - mode = nvmlDeviceGetComputeMode(handle) - if mode == NVML_COMPUTEMODE_DEFAULT: - modeStr = 'Default' - elif mode == NVML_COMPUTEMODE_EXCLUSIVE_THREAD: - modeStr = 'Exclusive Thread' - elif mode == NVML_COMPUTEMODE_PROHIBITED: - modeStr = 'Prohibited' - elif mode == NVML_COMPUTEMODE_EXCLUSIVE_PROCESS: - modeStr = 'Exclusive Process' - else: - modeStr = 'Unknown' - except NVMLError as err: - modeStr = handleError(err) - - strResult += ' ' + modeStr + '\n' - - try: - util = nvmlDeviceGetUtilizationRates(handle) - gpu_util = str(util.gpu) - mem_util = str(util.memory) - except NVMLError as err: - error = handleError(err) - gpu_util = error - mem_util = error - - strResult += ' \n' - strResult += ' ' + gpu_util + ' %\n' - strResult += ' ' + mem_util + ' %\n' - strResult += ' \n' - - try: - (current, pending) = nvmlDeviceGetEccMode(handle) - curr_str = 'Enabled' if (current != 0) else 'Disabled' - pend_str = 'Enabled' if (pending != 0) else 'Disabled' - except NVMLError as err: - error = handleError(err) - curr_str = error - pend_str = error - - strResult += ' \n' - strResult += ' ' + curr_str + '\n' - strResult += ' ' + pend_str + '\n' - strResult += ' \n' - - strResult += ' \n' - strResult += GetEccStr(handle) - strResult += ' \n' - - try: - temp = str(nvmlDeviceGetTemperature(handle, NVML_TEMPERATURE_GPU)) + ' C' - except NVMLError as err: - temp = handleError(err) - - strResult += ' \n' - strResult += ' ' + temp + '\n' - strResult += ' \n' - - strResult += ' \n' - try: - perfState = nvmlDeviceGetPowerState(handle) - except NVMLError as err: - perfState = handleError(err) - strResult += ' P%s\n' % perfState - try: - powMan = nvmlDeviceGetPowerManagementMode(handle) - powManStr = 'Supported' if powMan != 0 else 'N/A' - except NVMLError as err: - powManStr = handleError(err) - strResult += ' ' + powManStr + '\n' - try: - powDraw = (nvmlDeviceGetPowerUsage(handle) / 1000.0) - powDrawStr = '%.2f W' % powDraw - except NVMLError as err: - powDrawStr = handleError(err) - strResult += ' ' + powDrawStr + '\n' - try: - powLimit = (nvmlDeviceGetPowerManagementLimit(handle) / 1000.0) - powLimitStr = '%d W' % powLimit - except NVMLError as err: - powLimitStr = handleError(err) - strResult += ' ' + powLimitStr + '\n' - strResult += ' \n' - - strResult += ' \n' - try: - graphics = str(nvmlDeviceGetClockInfo(handle, NVML_CLOCK_GRAPHICS)) - except NVMLError as err: - graphics = handleError(err) - strResult += ' ' +graphics + ' MHz\n' - try: - sm = str(nvmlDeviceGetClockInfo(handle, NVML_CLOCK_SM)) - except NVMLError as err: - sm = handleError(err) - strResult += ' ' + sm + ' MHz\n' - try: - mem = str(nvmlDeviceGetClockInfo(handle, NVML_CLOCK_MEM)) - except NVMLError as err: - mem = handleError(err) - strResult += ' ' + mem + ' MHz\n' - strResult += ' \n' - - strResult += ' \n' - try: - graphics = str(nvmlDeviceGetMaxClockInfo(handle, NVML_CLOCK_GRAPHICS)) - except NVMLError as err: - graphics = handleError(err) - strResult += ' ' + graphics + ' MHz\n' - try: - sm = str(nvmlDeviceGetMaxClockInfo(handle, NVML_CLOCK_SM)) - except NVMLError as err: - sm = handleError(err) - strResult += ' ' + sm + ' MHz\n' - try: - mem = str(nvmlDeviceGetMaxClockInfo(handle, NVML_CLOCK_MEM)) - except NVMLError as err: - mem = handleError(err) - strResult += ' ' + mem + ' MHz\n' - strResult += ' \n' - - try: - perfState = nvmlDeviceGetPowerState(handle) - perfStateStr = 'P%s' % perfState - except NVMLError as err: - perfStateStr = handleError(err) - strResult += ' ' + perfStateStr + '\n' - - strResult += ' \n' - - procstr = "" - try: - procs = nvmlDeviceGetComputeRunningProcesses(handle) - except NVMLError as err: - procs = [] - procstr = handleError(err) - - for p in procs: - procstr += ' \n' - procstr += ' %d\n' % p.pid - try: - name = str(nvmlSystemGetProcessName(p.pid)) - except NVMLError as err: - if (err.value == NVML_ERROR_NOT_FOUND): - # probably went away - continue - else: - name = handleError(err) - procstr += ' ' + name + '\n' - procstr += ' \n' - if (p.usedGpuMemory == None): - procstr += 'N\A' - else: - procstr += '%d MB\n' % (p.usedGpuMemory / 1024 / 1024) - procstr += '\n' - procstr += ' \n' - - strResult += procstr - strResult += ' \n' - strResult += ' \n' - - strResult += '\n' - - except NVMLError as err: - strResult += 'nvidia_smi.py: ' + err.__str__() + '\n' - - nvmlShutdown() - - return strResult - diff --git a/gpu/nvidia/nvidia-ml-py-3.295.00/build/lib/pynvml.py b/gpu/nvidia/nvidia-ml-py-3.295.00/build/lib/pynvml.py deleted file mode 100644 index 90f8bdd8..00000000 --- a/gpu/nvidia/nvidia-ml-py-3.295.00/build/lib/pynvml.py +++ /dev/null @@ -1,903 +0,0 @@ -##### -# Copyright (c) 2011-2012, NVIDIA Corporation. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of the NVIDIA Corporation nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -# THE POSSIBILITY OF SUCH DAMAGE. -##### - -## -# Python bindings for the NVML library -## -from ctypes import * -from ctypes.util import find_library -import sys -import threading - -## C Type mappings ## -## Enums -_nvmlEnableState_t = c_uint -NVML_FEATURE_DISABLED = 0 -NVML_FEATURE_ENABLED = 1 - -_nvmlTemperatureSensors_t = c_uint -NVML_TEMPERATURE_GPU = 0 - -_nvmlComputeMode_t = c_uint -NVML_COMPUTEMODE_DEFAULT = 0 -NVML_COMPUTEMODE_EXCLUSIVE_THREAD = 1 -NVML_COMPUTEMODE_PROHIBITED = 2 -NVML_COMPUTEMODE_EXCLUSIVE_PROCESS = 3 - -_nvmlEccBitType_t = c_uint -NVML_SINGLE_BIT_ECC = 0 -NVML_DOUBLE_BIT_ECC = 1 - -_nvmlEccCounterType_t = c_uint -NVML_VOLATILE_ECC = 0 -NVML_AGGREGATE_ECC = 1 - -_nvmlClockType_t = c_uint -NVML_CLOCK_GRAPHICS = 0 -NVML_CLOCK_SM = 1 -NVML_CLOCK_MEM = 2 - -_nvmlDriverModel_t = c_uint -NVML_DRIVER_WDDM = 0 -NVML_DRIVER_WDM = 1 - -_nvmlPstates_t = c_uint -NVML_PSTATE_0 = 0 -NVML_PSTATE_1 = 1 -NVML_PSTATE_2 = 2 -NVML_PSTATE_3 = 3 -NVML_PSTATE_4 = 4 -NVML_PSTATE_5 = 5 -NVML_PSTATE_6 = 6 -NVML_PSTATE_7 = 7 -NVML_PSTATE_8 = 8 -NVML_PSTATE_9 = 9 -NVML_PSTATE_10 = 10 -NVML_PSTATE_11 = 11 -NVML_PSTATE_12 = 12 -NVML_PSTATE_13 = 13 -NVML_PSTATE_14 = 14 -NVML_PSTATE_15 = 15 -NVML_PSTATE_UNKNOWN = 32 - -_nvmlInforomObject_t = c_uint -NVML_INFOROM_OEM = 0 -NVML_INFOROM_ECC = 1 -NVML_INFOROM_POWER = 2 - -_nvmlReturn_t = c_uint -NVML_SUCCESS = 0 -NVML_ERROR_UNINITIALIZED = 1 -NVML_ERROR_INVALID_ARGUMENT = 2 -NVML_ERROR_NOT_SUPPORTED = 3 -NVML_ERROR_NO_PERMISSION = 4 -NVML_ERROR_ALREADY_INITIALIZED = 5 -NVML_ERROR_NOT_FOUND = 6 -NVML_ERROR_INSUFFICIENT_SIZE = 7 -NVML_ERROR_INSUFFICIENT_POWER = 8 -NVML_ERROR_DRIVER_NOT_LOADED = 9 -NVML_ERROR_TIMEOUT = 10, -NVML_ERROR_UNKNOWN = 999 - -_nvmlFanState_t = c_uint -NVML_FAN_NORMAL = 0 -NVML_FAN_FAILED = 1 - -_nvmlLedColor_t = c_uint -NVML_LED_COLOR_GREEN = 0 -NVML_LED_COLOR_AMBER = 1 - -# C preprocessor defined values -nvmlFlagDefault = 0 -nvmlFlagForce = 1 - -# buffer size -NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE = 16 -NVML_DEVICE_UUID_BUFFER_SIZE = 80 -NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE = 81 -NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE = 80 -NVML_DEVICE_NAME_BUFFER_SIZE = 64 -NVML_DEVICE_SERIAL_BUFFER_SIZE = 30 -NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE = 32 - -NVML_VALUE_NOT_AVAILABLE_ulonglong = c_ulonglong(-1) - -## Lib loading ## -nvmlLib = None -libLoadLock = threading.Lock() - -## Error Checking ## -class NVMLError(Exception): - def __init__(self, value): - self.value = value - def __str__(self): - return str(nvmlErrorString(self.value)) - -def _nvmlCheckReturn(ret): - if (ret != NVML_SUCCESS): - raise NVMLError(ret) - return ret - -## Function access ## -def _nvmlGetFunctionPointer(name): - global nvmlLib - global libLoadLock - - libLoadLock.acquire() - try: - # ensure library was loaded - if (nvmlLib == None): - raise NVMLError(NVML_ERROR_UNINITIALIZED) - try: - return getattr(nvmlLib, name) - except AttributeError as attrError: - raise NVMLError(NVML_ERROR_NOT_SUPPORTED) - finally: - # lock is always freed - libLoadLock.release() - -## Alternative object -# Allows the object to be printed -# Allows mismatched types to be assigned -# - like None when the Structure variant requires c_uint -class nvmlFriendlyObject(object): - def __init__(self, dictionary): - for x in dictionary: - setattr(self, x, dictionary[x]) - def __str__(self): - return self.__dict__.__str__() - -def nvmlStructToFriendlyObject(struct): - d = {} - for x in struct._fields_: - key = x[0] - value = getattr(struct, key) - d[key] = value - obj = nvmlFriendlyObject(d) - return obj - -# pack the object so it can be passed to the NVML library -def nvmlFriendlyObjectToStruct(obj, model): - for x in model._fields_: - key = x[0] - value = obj.__dict__[key] - setattr(model, key, value) - return model - -## Unit structures -class struct_c_nvmlUnit_t(Structure): - pass # opaque handle -c_nvmlUnit_t = POINTER(struct_c_nvmlUnit_t) - -class c_nvmlUnitInfo_t(Structure): - _fields_ = [ - ('name', c_char * 96), - ('id', c_char * 96), - ('serial', c_char * 96), - ('firmwareVersion', c_char * 96), - ] - -class c_nvmlLedState_t(Structure): - _fields_ = [ - ('cause', c_char * 256), - ('color', _nvmlLedColor_t), - ] - -class c_nvmlPSUInfo_t(Structure): - _fields_ = [ - ('state', c_char * 256), - ('current', c_uint), - ('voltage', c_uint), - ('power', c_uint), - ] - -class c_nvmlUnitFanInfo_t(Structure): - _fields_ = [ - ('speed', c_uint), - ('state', _nvmlFanState_t), - ] - -class c_nvmlUnitFanSpeeds_t(Structure): - _fields_ = [ - ('fans', c_nvmlUnitFanInfo_t * 24), - ('count', c_uint) - ] - -## Device structures -class struct_c_nvmlDevice_t(Structure): - pass # opaque handle -c_nvmlDevice_t = POINTER(struct_c_nvmlDevice_t) - -class nvmlPciInfo_t(Structure): - _fields_ = [ - ('busId', c_char * 16), - ('domain', c_uint), - ('bus', c_uint), - ('device', c_uint), - ('pciDeviceId', c_uint), - - # Added in 2.285 - ('pciSubSystemId', c_uint), - ('reserved0', c_uint), - ('reserved1', c_uint), - ('reserved2', c_uint), - ('reserved3', c_uint), - ] - -class c_nvmlMemory_t(Structure): - _fields_ = [ - ('total', c_ulonglong), - ('free', c_ulonglong), - ('used', c_ulonglong), - ] - -# On Windows with the WDDM driver, usedGpuMemory is reported as None -# Code that processes this structure should check for None, I.E. -# -# if (info.usedGpuMemory == None): -# # TODO handle the error -# pass -# else: -# print("Using %d MB of memory" % (info.usedGpuMemory / 1024 / 1024)) -# -# See NVML documentation for more information -class c_nvmlProcessInfo_t(Structure): - _fields_ = [ - ('pid', c_uint), - ('usedGpuMemory', c_ulonglong), - ] - -class c_nvmlEccErrorCounts_t(Structure): - _fields_ = [ - ('l1Cache', c_ulonglong), - ('l2Cache', c_ulonglong), - ('deviceMemory', c_ulonglong), - ('registerFile', c_ulonglong), - ] - -class c_nvmlUtilization_t(Structure): - _fields_ = [ - ('gpu', c_uint), - ('memory', c_uint), - ] - -# Added in 2.285 -class c_nvmlHwbcEntry_t(Structure): - _fields_ = [ - ('hwbcId', c_uint), - ('firmwareVersion', c_char * 32), - ] - -## Event structures -class struct_c_nvmlEventSet_t(Structure): - pass # opaque handle -c_nvmlEventSet_t = POINTER(struct_c_nvmlEventSet_t) - -nvmlEventTypeSingleBitEccError = 0x0000000000000001 -nvmlEventTypeDoubleBitEccError = 0x0000000000000002 -nvmlEventTypePState = 0x0000000000000004 -nvmlEventTypeXidCriticalError = 0x0000000000000008 -nvmlEventTypeNone = 0x0000000000000000 -nvmlEventTypeAll = ( - nvmlEventTypeNone | - nvmlEventTypeSingleBitEccError | - nvmlEventTypeDoubleBitEccError | - nvmlEventTypePState | - nvmlEventTypeXidCriticalError - ) - -class c_nvmlEventData_t(Structure): - _fields_ = [ - ('device', c_nvmlDevice_t), - ('eventType', c_ulonglong), - ('reserved', c_ulonglong) - ] - -## C function wrappers ## -def nvmlInit(): - global nvmlLib - global libLoadLock - - # - # Load the library if it isn't loaded already - # - if (nvmlLib == None): - # lock to ensure only one caller loads the library - libLoadLock.acquire() - - try: - # ensure the library still isn't loaded - if (nvmlLib == None): - try: - if (sys.platform[:3] == "win"): - # cdecl calling convention - nvmlLib = cdll.nvml - else: - # assume linux - nvmlLib = CDLL("libnvidia-ml.so") - except OSError as ose: - print(ose) - _nvmlCheckReturn(NVML_ERROR_DRIVER_NOT_LOADED) - if (nvmlLib == None): - print("Failed to load NVML") - _nvmlCheckReturn(NVML_ERROR_DRIVER_NOT_LOADED) - finally: - # lock is always freed - libLoadLock.release() - - # - # Initialize the library - # - fn = _nvmlGetFunctionPointer("nvmlInit") - ret = fn() - _nvmlCheckReturn(ret) - return None - -def nvmlShutdown(): - # - # Leave the library loaded, but shutdown the interface - # - fn = _nvmlGetFunctionPointer("nvmlShutdown") - ret = fn() - _nvmlCheckReturn(ret) - return None - -# Added in 2.285 -def nvmlErrorString(result): - fn = _nvmlGetFunctionPointer("nvmlErrorString") - fn.restype = c_char_p # otherwise return is an int - ret = fn(result) - return ret - -# Added in 2.285 -def nvmlSystemGetNVMLVersion(): - c_version = create_string_buffer(NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE) - fn = _nvmlGetFunctionPointer("nvmlSystemGetNVMLVersion") - ret = fn(c_version, c_uint(NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE)) - _nvmlCheckReturn(ret) - return c_version.value - -# Added in 2.285 -def nvmlSystemGetProcessName(pid): - c_name = create_string_buffer(1024) - fn = _nvmlGetFunctionPointer("nvmlSystemGetProcessName") - ret = fn(c_uint(pid), c_name, c_uint(1024)) - _nvmlCheckReturn(ret) - return c_name.value - -def nvmlSystemGetDriverVersion(): - c_version = create_string_buffer(NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE) - fn = _nvmlGetFunctionPointer("nvmlSystemGetDriverVersion") - ret = fn(c_version, c_uint(NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE)) - _nvmlCheckReturn(ret) - return c_version.value - -# Added in 2.285 -def nvmlSystemGetHicVersion(): - c_count = c_uint(0) - hics = None - fn = _nvmlGetFunctionPointer("nvmlSystemGetHicVersion") - - # get the count - ret = fn(byref(c_count), None) - - # this should only fail with insufficient size - if ((ret != NVML_SUCCESS) and - (ret != NVML_ERROR_INSUFFICIENT_SIZE)): - raise NVMLError(ret) - - # if there are no hics - if (c_count.value == 0): - return [] - - hic_array = c_nvmlHwbcEntry_t * c_count.value - hics = hic_array() - ret = fn(byref(c_count), hics) - _nvmlCheckReturn(ret) - return hics - -## Unit get functions -def nvmlUnitGetCount(): - c_count = c_uint() - fn = _nvmlGetFunctionPointer("nvmlUnitGetCount") - ret = fn(byref(c_count)) - _nvmlCheckReturn(ret) - return c_count.value - -def nvmlUnitGetHandleByIndex(index): - c_index = c_uint(index) - unit = c_nvmlUnit_t() - fn = _nvmlGetFunctionPointer("nvmlUnitGetHandleByIndex") - ret = fn(c_index, byref(unit)) - _nvmlCheckReturn(ret) - return unit - -def nvmlUnitGetUnitInfo(unit): - c_info = c_nvmlUnitInfo_t() - fn = _nvmlGetFunctionPointer("nvmlUnitGetUnitInfo") - ret = fn(unit, byref(c_info)) - _nvmlCheckReturn(ret) - return c_info - -def nvmlUnitGetLedState(unit): - c_state = c_nvmlLedState_t() - fn = _nvmlGetFunctionPointer("nvmlUnitGetLedState") - ret = fn(unit, byref(c_state)) - _nvmlCheckReturn(ret) - return c_state - -def nvmlUnitGetPsuInfo(unit): - c_info = c_nvmlPSUInfo_t() - fn = _nvmlGetFunctionPointer("nvmlUnitGetPsuInfo") - ret = fn(unit, byref(c_info)) - _nvmlCheckReturn(ret) - return c_info - -def nvmlUnitGetTemperature(unit, type): - c_temp = c_uint() - fn = _nvmlGetFunctionPointer("nvmlUnitGetTemperature") - ret = fn(unit, c_uint(type), byref(c_temp)) - _nvmlCheckReturn(ret) - return c_temp.value - -def nvmlUnitGetFanSpeedInfo(unit): - c_speeds = c_nvmlUnitFanSpeeds_t() - fn = _nvmlGetFunctionPointer("nvmlUnitGetFanSpeedInfo") - ret = fn(unit, byref(c_speeds)) - _nvmlCheckReturn(ret) - return c_speeds - -# added to API -def nvmlUnitGetDeviceCount(unit): - c_count = c_uint(0) - # query the unit to determine device count - fn = _nvmlGetFunctionPointer("nvmlUnitGetDevices") - ret = fn(unit, byref(c_count), None) - if (ret == NVML_ERROR_INSUFFICIENT_SIZE): - ret = NVML_ERROR_SUCCESS - _nvmlCheckReturn(ret) - return c_count.value - -def nvmlUnitGetDevices(unit): - c_count = c_uint(nvmlUnitGetDeviceCount(unit)) - device_array = c_nvmlDevice_t * c_count.value - c_devices = device_array() - fn = _nvmlGetFunctionPointer("nvmlUnitGetDevices") - ret = fn(unit, byref(c_count), c_devices) - _nvmlCheckReturn(ret) - return c_devices - -## Device get functions -def nvmlDeviceGetCount(): - c_count = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetCount") - ret = fn(byref(c_count)) - _nvmlCheckReturn(ret) - return c_count.value - -def nvmlDeviceGetHandleByIndex(index): - c_index = c_uint(index) - device = c_nvmlDevice_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetHandleByIndex") - ret = fn(c_index, byref(device)) - _nvmlCheckReturn(ret) - return device - -def nvmlDeviceGetHandleBySerial(serial): - c_serial = c_char_p(serial) - device = c_nvmlDevice_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetHandleBySerial") - ret = fn(c_serial, byref(device)) - _nvmlCheckReturn(ret) - return device - -def nvmlDeviceGetHandleByUUID(uuid): - c_uuid = c_char_p(uuid) - device = c_nvmlDevice_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetHandleByUUID") - ret = fn(c_uuid, byref(device)) - _nvmlCheckReturn(ret) - return device - -def nvmlDeviceGetHandleByPciBusId(pciBusId): - c_busId = c_char_p(pciBusId) - device = c_nvmlDevice_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetHandleByPciBusId") - ret = fn(c_busId, byref(device)) - _nvmlCheckReturn(ret) - return device - -def nvmlDeviceGetName(handle): - c_name = create_string_buffer(NVML_DEVICE_NAME_BUFFER_SIZE) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetName") - ret = fn(handle, c_name, c_uint(NVML_DEVICE_NAME_BUFFER_SIZE)) - _nvmlCheckReturn(ret) - return c_name.value - -def nvmlDeviceGetSerial(handle): - c_serial = create_string_buffer(NVML_DEVICE_SERIAL_BUFFER_SIZE) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetSerial") - ret = fn(handle, c_serial, c_uint(NVML_DEVICE_SERIAL_BUFFER_SIZE)) - _nvmlCheckReturn(ret) - return c_serial.value - -def nvmlDeviceGetUUID(handle): - c_uuid = create_string_buffer(NVML_DEVICE_UUID_BUFFER_SIZE) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetUUID") - ret = fn(handle, c_uuid, c_uint(NVML_DEVICE_UUID_BUFFER_SIZE)) - _nvmlCheckReturn(ret) - return c_uuid.value - -def nvmlDeviceGetInforomVersion(handle, infoRomObject): - c_version = create_string_buffer(NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetInforomVersion") - ret = fn(handle, _nvmlInforomObject_t(infoRomObject), - c_version, c_uint(NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE)) - _nvmlCheckReturn(ret) - return c_version.value - -def nvmlDeviceGetDisplayMode(handle): - c_mode = _nvmlEnableState_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetDisplayMode") - ret = fn(handle, byref(c_mode)) - _nvmlCheckReturn(ret) - return c_mode.value - -def nvmlDeviceGetPersistenceMode(handle): - c_state = _nvmlEnableState_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPersistenceMode") - ret = fn(handle, byref(c_state)) - _nvmlCheckReturn(ret) - return c_state.value - -def nvmlDeviceGetPciInfo(handle): - c_info = nvmlPciInfo_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPciInfo_v2") - ret = fn(handle, byref(c_info)) - _nvmlCheckReturn(ret) - return c_info - -def nvmlDeviceGetClockInfo(handle, type): - c_clock = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetClockInfo") - ret = fn(handle, _nvmlClockType_t(type), byref(c_clock)) - _nvmlCheckReturn(ret) - return c_clock.value - -# Added in 2.285 -def nvmlDeviceGetMaxClockInfo(handle, type): - c_clock = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetMaxClockInfo") - ret = fn(handle, _nvmlClockType_t(type), byref(c_clock)) - _nvmlCheckReturn(ret) - return c_clock.value - -def nvmlDeviceGetFanSpeed(handle): - c_speed = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetFanSpeed") - ret = fn(handle, byref(c_speed)) - _nvmlCheckReturn(ret) - return c_speed.value - -def nvmlDeviceGetTemperature(handle, sensor): - c_temp = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetTemperature") - ret = fn(handle, _nvmlTemperatureSensors_t(sensor), byref(c_temp)) - _nvmlCheckReturn(ret) - return c_temp.value - -# DEPRECATED use nvmlDeviceGetPerformanceState -def nvmlDeviceGetPowerState(handle): - c_pstate = _nvmlPstates_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerState") - ret = fn(handle, byref(c_pstate)) - _nvmlCheckReturn(ret) - return c_pstate.value - -def nvmlDeviceGetPerformanceState(handle): - c_pstate = _nvmlPstates_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPerformanceState") - ret = fn(handle, byref(c_pstate)) - _nvmlCheckReturn(ret) - return c_pstate.value - -def nvmlDeviceGetPowerManagementMode(handle): - c_pcapMode = _nvmlEnableState_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerManagementMode") - ret = fn(handle, byref(c_pcapMode)) - _nvmlCheckReturn(ret) - return c_pcapMode.value - -def nvmlDeviceGetPowerManagementLimit(handle): - c_limit = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerManagementLimit") - ret = fn(handle, byref(c_limit)) - _nvmlCheckReturn(ret) - return c_limit.value - -def nvmlDeviceGetPowerUsage(handle): - c_watts = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerUsage") - ret = fn(handle, byref(c_watts)) - _nvmlCheckReturn(ret) - return c_watts.value - -def nvmlDeviceGetMemoryInfo(handle): - c_memory = c_nvmlMemory_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetMemoryInfo") - ret = fn(handle, byref(c_memory)) - _nvmlCheckReturn(ret) - return c_memory - -def nvmlDeviceGetComputeMode(handle): - c_mode = _nvmlComputeMode_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetComputeMode") - ret = fn(handle, byref(c_mode)) - _nvmlCheckReturn(ret) - return c_mode.value - -def nvmlDeviceGetEccMode(handle): - c_currState = _nvmlEnableState_t() - c_pendingState = _nvmlEnableState_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetEccMode") - ret = fn(handle, byref(c_currState), byref(c_pendingState)) - _nvmlCheckReturn(ret) - return [c_currState.value, c_pendingState.value] - -# added to API -def nvmlDeviceGetCurrentEccMode(handle): - return nvmlDeviceGetEccMode(handle)[0] - -# added to API -def nvmlDeviceGetPendingEccMode(handle): - return nvmlDeviceGetEccMode(handle)[1] - -def nvmlDeviceGetTotalEccErrors(handle, bitType, counterType): - c_count = c_ulonglong() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetTotalEccErrors") - ret = fn(handle, _nvmlEccBitType_t(bitType), - _nvmlEccCounterType_t(counterType), byref(c_count)) - _nvmlCheckReturn(ret) - return c_count.value - -def nvmlDeviceGetDetailedEccErrors(handle, bitType, counterType): - c_count = c_nvmlEccErrorCounts_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetDetailedEccErrors") - ret = fn(handle, _nvmlEccBitType_t(bitType), - _nvmlEccCounterType_t(counterType), byref(c_count)) - _nvmlCheckReturn(ret) - return c_count - -def nvmlDeviceGetUtilizationRates(handle): - c_util = c_nvmlUtilization_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetUtilizationRates") - ret = fn(handle, byref(c_util)) - _nvmlCheckReturn(ret) - return c_util - -def nvmlDeviceGetDriverModel(handle): - c_currModel = _nvmlDriverModel_t() - c_pendingModel = _nvmlDriverModel_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetDriverModel") - ret = fn(handle, byref(c_currModel), byref(c_pendingModel)) - _nvmlCheckReturn(ret) - return [c_currModel.value, c_pendingModel.value] - -# added to API -def nvmlDeviceGetCurrentDriverModel(handle): - return nvmlDeviceGetDriverModel(handle)[0] - -# added to API -def nvmlDeviceGetPendingDriverModel(handle): - return nvmlDeviceGetDriverModel(handle)[1] - -# Added in 2.285 -def nvmlDeviceGetVbiosVersion(handle): - c_version = create_string_buffer(NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetVbiosVersion") - ret = fn(handle, c_version, c_uint(NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE)) - _nvmlCheckReturn(ret) - return c_version.value - -# Added in 2.285 -def nvmlDeviceGetComputeRunningProcesses(handle): - # first call to get the size - c_count = c_uint(0) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetComputeRunningProcesses") - ret = fn(handle, byref(c_count), None) - - if (ret == NVML_SUCCESS): - # special case, no running processes - return [] - elif (ret == NVML_ERROR_INSUFFICIENT_SIZE): - # typical case - # oversize the array incase more processes are created - c_count.value = c_count.value * 2 + 5 - proc_array = c_nvmlProcessInfo_t * c_count.value - c_procs = proc_array() - - # make the call again - ret = fn(handle, byref(c_count), c_procs) - _nvmlCheckReturn(ret) - - procs = [] - for i in range(c_count.value): - # use an alternative struct for this object - obj = nvmlStructToFriendlyObject(c_procs[i]) - if (obj.usedGpuMemory == NVML_VALUE_NOT_AVAILABLE_ulonglong.value): - # special case for WDDM on Windows, see comment above - obj.usedGpuMemory = None - procs.append(obj) - - return procs - else: - # error case - raise NVMLError(ret) - -## Set functions -def nvmlUnitSetLedState(unit, color): - fn = _nvmlGetFunctionPointer("nvmlUnitSetLedState") - ret = fn(unit, _nvmlLedColor_t(color)) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceSetPersistenceMode(handle, mode): - fn = _nvmlGetFunctionPointer("nvmlDeviceSetPersistenceMode") - ret = fn(handle, _nvmlEnableState_t(mode)) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceSetComputeMode(handle, mode): - fn = _nvmlGetFunctionPointer("nvmlDeviceSetComputeMode") - ret = fn(handle, _nvmlComputeMode_t(mode)) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceSetEccMode(handle, mode): - fn = _nvmlGetFunctionPointer("nvmlDeviceSetEccMode") - ret = fn(handle, _nvmlEnableState_t(mode)) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceClearEccErrorCounts(handle, counterType): - fn = _nvmlGetFunctionPointer("nvmlDeviceClearEccErrorCounts") - ret = fn(handle, _nvmlEccCounterType_t(counterType)) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceSetDriverModel(handle, model): - fn = _nvmlGetFunctionPointer("nvmlDeviceSetDriverModel") - ret = fn(handle, _nvmlDriverModel_t(model)) - _nvmlCheckReturn(ret) - return None - -# Added in 2.285 -def nvmlEventSetCreate(): - fn = _nvmlGetFunctionPointer("nvmlEventSetCreate") - eventSet = c_nvmlEventSet_t() - ret = fn(byref(eventSet)) - _nvmlCheckReturn(ret) - return eventSet - -# Added in 2.285 -def nvmlDeviceRegisterEvents(handle, eventTypes, eventSet): - fn = _nvmlGetFunctionPointer("nvmlDeviceRegisterEvents") - ret = fn(handle, c_ulonglong(eventTypes), eventSet) - _nvmlCheckReturn(ret) - return None - -# Added in 2.285 -def nvmlDeviceGetSupportedEventTypes(handle): - c_eventTypes = c_ulonglong() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetSupportedEventTypes") - ret = fn(handle, byref(c_eventTypes)) - _nvmlCheckReturn(ret) - return c_eventTypes.value - -# Added in 2.285 -# raises NVML_ERROR_TIMEOUT exception on timeout -def nvmlEventSetWait(eventSet, timeoutms): - fn = _nvmlGetFunctionPointer("nvmlEventSetWait") - data = c_nvmlEventData_t() - ret = fn(eventSet, byref(data), c_uint(timeoutms)) - _nvmlCheckReturn(ret) - return data - -# Added in 2.285 -def nvmlEventSetFree(eventSet): - fn = _nvmlGetFunctionPointer("nvmlEventSetFree") - ret = fn(eventSet) - _nvmlCheckReturn(ret) - return None - -# Added in 2.285 -def nvmlEventDataGetPerformanceState(data): - fn = _nvmlGetFunctionPointer("nvmlEventDataGetPerformanceState") - pstate = _nvmlPstates_t() - ret = fn(byref(data), byref(pstate)) - _nvmlCheckReturn(ret) - return pstate.value - -# Added in 2.285 -def nvmlEventDataGetXidCriticalError(data): - fn = _nvmlGetFunctionPointer("nvmlEventDataGetXidCriticalError") - xid = c_uint() - ret = fn(byref(data), byref(xid)) - _nvmlCheckReturn(ret) - return xid.value - -# Added in 2.285 -def nvmlEventDataGetEccErrorCount(data): - fn = _nvmlGetFunctionPointer("nvmlEventDataGetEccErrorCount") - ecc = c_ulonglong() - ret = fn(byref(data), byref(ecc)) - _nvmlCheckReturn(ret) - return ecc.value - -# Added in 3.295 -def nvmlDeviceOnSameBoard(handle1, handle2): - fn = _nvmlGetFunctionPointer("nvmlDeviceOnSameBoard") - onSameBoard = c_int() - ret = fn(handle1, handle2, byref(onSameBoard)) - _nvmlCheckReturn(ret) - return (onSameBoard.value != 0) - -# Added in 3.295 -def nvmlDeviceGetCurrPcieLinkGeneration(handle): - fn = _nvmlGetFunctionPointer("nvmlDeviceGetCurrPcieLinkGeneration") - gen = c_uint() - ret = fn(handle, byref(gen)) - _nvmlCheckReturn(ret) - return gen.value - -# Added in 3.295 -def nvmlDeviceGetMaxPcieLinkGeneration(handle): - fn = _nvmlGetFunctionPointer("nvmlDeviceGetMaxPcieLinkGeneration") - gen = c_uint() - ret = fn(handle, byref(gen)) - _nvmlCheckReturn(ret) - return gen.value - -# Added in 3.295 -def nvmlDeviceGetCurrPcieLinkWidth(handle): - fn = _nvmlGetFunctionPointer("nvmlDeviceGetCurrPcieLinkWidth") - width = c_uint() - ret = fn(handle, byref(width)) - _nvmlCheckReturn(ret) - return width.value - -# Added in 3.295 -def nvmlDeviceGetMaxPcieLinkWidth(handle): - fn = _nvmlGetFunctionPointer("nvmlDeviceGetMaxPcieLinkWidth") - width = c_uint() - ret = fn(handle, byref(width)) - _nvmlCheckReturn(ret) - return width.value - - - diff --git a/gpu/nvidia/nvidia-ml-py-3.295.00/nvidia_smi.py b/gpu/nvidia/nvidia-ml-py-3.295.00/nvidia_smi.py deleted file mode 100644 index f1a42707..00000000 --- a/gpu/nvidia/nvidia-ml-py-3.295.00/nvidia_smi.py +++ /dev/null @@ -1,455 +0,0 @@ -##### -# Copyright (c) 2011-2012, NVIDIA Corporation. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of the NVIDIA Corporation nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -# THE POSSIBILITY OF SUCH DAMAGE. -##### - -# -# nvidia_smi -# nvml_bindings nvidia com -# -# Sample code that attempts to reproduce the output of nvidia-smi -q- x -# For many cases the output should match -# -# To Run: -# $ python -# Python 2.7 (r27:82500, Sep 16 2010, 18:02:00) -# [GCC 4.5.1 20100907 (Red Hat 4.5.1-3)] on linux2 -# Type "help", "copyright", "credits" or "license" for more information. -# >>> import nvidia_smi -# >>> print(nvidia_smi.XmlDeviceQuery()) -# ... -# - -from pynvml import * -import datetime - -# -# Helper functions -# -def GetEccByType(handle, counterType, bitType): - try: - count = str(nvmlDeviceGetTotalEccErrors(handle, bitType, counterType)) - except NVMLError as err: - count = handleError(err) - - try: - detail = nvmlDeviceGetDetailedEccErrors(handle, bitType, counterType) - deviceMemory = str(detail.deviceMemory) - registerFile = str(detail.registerFile) - l1Cache = str(detail.l1Cache) - l2Cache = str(detail.l2Cache) - except NVMLError as err: - msg = handleError(err) - deviceMemory = msg - registerFile = msg - l1Cache = msg - l2Cache = msg - strResult = '' - strResult += ' ' + deviceMemory + '\n' - strResult += ' ' + registerFile + '\n' - strResult += ' ' + l1Cache + '\n' - strResult += ' ' + l2Cache + '\n' - strResult += ' ' + count + '\n' - return strResult - -def GetEccByCounter(handle, counterType): - strResult = '' - strResult += ' \n' - strResult += str(GetEccByType(handle, counterType, NVML_SINGLE_BIT_ECC)) - strResult += ' \n' - strResult += ' \n' - strResult += str(GetEccByType(handle, counterType, NVML_DOUBLE_BIT_ECC)) - strResult += ' \n' - return strResult - -def GetEccStr(handle): - strResult = '' - strResult += ' \n' - strResult += str(GetEccByCounter(handle, NVML_VOLATILE_ECC)) - strResult += ' \n' - strResult += ' \n' - strResult += str(GetEccByCounter(handle, NVML_AGGREGATE_ECC)) - strResult += ' \n' - return strResult - -# -# Converts errors into string messages -# -def handleError(err): - if (err.value == NVML_ERROR_NOT_SUPPORTED): - return "N/A" - else: - return err.__str__() - -####### -def XmlDeviceQuery(): - - try: - # - # Initialize NVML - # - nvmlInit() - strResult = '' - - strResult += '\n' - strResult += '\n' - strResult += '\n' - - strResult += ' ' + str(datetime.date.today()) + '\n' - strResult += ' ' + str(nvmlSystemGetDriverVersion()) + '\n' - - deviceCount = nvmlDeviceGetCount() - strResult += ' ' + str(deviceCount) + '\n' - - for i in range(0, deviceCount): - handle = nvmlDeviceGetHandleByIndex(i) - - pciInfo = nvmlDeviceGetPciInfo(handle) - - strResult += ' \n' % pciInfo.busId - - strResult += ' ' + nvmlDeviceGetName(handle) + '\n' - - try: - state = ('Enabled' if (nvmlDeviceGetDisplayMode(handle) != 0) else 'Disabled') - except NVMLError as err: - state = handleError(err) - - strResult += ' ' + state + '\n' - - try: - mode = 'Enabled' if (nvmlDeviceGetPersistenceMode(handle) != 0) else 'Disabled' - except NVMLError as err: - mode = handleError(err) - - strResult += ' ' + mode + '\n' - - strResult += ' \n' - - try: - current = str(nvmlDeviceGetCurrentDriverModel(handle)) - except NVMLError as err: - current = handleError(err) - strResult += ' ' + current + '\n' - - try: - pending = str(nvmlDeviceGetPendingDriverModel(handle)) - except NVMLError as err: - pending = handleError(err) - - strResult += ' ' + pending + '\n' - - strResult += ' \n' - - try: - serial = nvmlDeviceGetSerial(handle) - except NVMLError as err: - serial = handleError(err) - - strResult += ' ' + serial + '\n' - - try: - uuid = nvmlDeviceGetUUID(handle) - except NVMLError as err: - uuid = handleError(err) - - strResult += ' ' + uuid + '\n' - - try: - vbios = nvmlDeviceGetVbiosVersion(handle) - except NVMLError as err: - vbios = handleError(err) - - strResult += ' ' + vbios + '\n' - - strResult += ' \n' - - try: - oem = nvmlDeviceGetInforomVersion(handle, NVML_INFOROM_OEM) - if oem == '': - oem = 'N/A' - except NVMLError as err: - oem = handleError(err) - - strResult += ' ' + oem + '\n' - - try: - ecc = nvmlDeviceGetInforomVersion(handle, NVML_INFOROM_ECC) - if ecc == '': - ecc = 'N/A' - except NVMLError as err: - ecc = handleError(err) - - strResult += ' ' + ecc + '\n' - try: - pwr = nvmlDeviceGetInforomVersion(handle, NVML_INFOROM_POWER) - if pwr == '': - pwr = 'N/A' - except NVMLError as err: - pwr = handleError(err) - - strResult += ' ' + pwr + '\n' - strResult += ' \n' - - strResult += ' \n' - strResult += ' %02X\n' % pciInfo.bus - strResult += ' %02X\n' % pciInfo.device - strResult += ' %04X\n' % pciInfo.domain - strResult += ' %08X\n' % (pciInfo.pciDeviceId) - strResult += ' %08X\n' % (pciInfo.pciSubSystemId) - strResult += ' ' + str(pciInfo.busId) + '\n' - strResult += ' \n' - - - strResult += ' \n' - - try: - gen = str(nvmlDeviceGetMaxPcieLinkGeneration(handle)) - except NVMLError as err: - gen = handleError(err) - - strResult += ' ' + gen + '\n' - - try: - gen = str(nvmlDeviceGetCurrPcieLinkGeneration(handle)) - except NVMLError as err: - gen = handleError(err) - - strResult += ' ' + gen + '\n' - strResult += ' \n' - strResult += ' \n' - - try: - width = str(nvmlDeviceGetMaxPcieLinkWidth(handle)) + 'x' - except NVMLError as err: - width = handleError(err) - - strResult += ' ' + width + '\n' - - try: - width = str(nvmlDeviceGetCurrPcieLinkWidth(handle)) + 'x' - except NVMLError as err: - width = handleError(err) - - strResult += ' ' + width + '\n' - - strResult += ' \n' - strResult += ' \n' - strResult += ' \n' - - try: - fan = str(nvmlDeviceGetFanSpeed(handle)) + ' %' - except NVMLError as err: - fan = handleError(err) - strResult += ' ' + fan + '\n' - - try: - memInfo = nvmlDeviceGetMemoryInfo(handle) - mem_total = str(memInfo.total / 1024 / 1024) + ' MB' - mem_used = str(memInfo.used / 1024 / 1024) + ' MB' - mem_free = str(memInfo.free / 1024 / 1024) + ' MB' - except NVMLError as err: - error = handleError(err) - mem_total = error - mem_used = error - mem_free = error - - strResult += ' \n' - strResult += ' ' + mem_total + '\n' - strResult += ' ' + mem_used + '\n' - strResult += ' ' + mem_free + '\n' - strResult += ' \n' - - - try: - mode = nvmlDeviceGetComputeMode(handle) - if mode == NVML_COMPUTEMODE_DEFAULT: - modeStr = 'Default' - elif mode == NVML_COMPUTEMODE_EXCLUSIVE_THREAD: - modeStr = 'Exclusive Thread' - elif mode == NVML_COMPUTEMODE_PROHIBITED: - modeStr = 'Prohibited' - elif mode == NVML_COMPUTEMODE_EXCLUSIVE_PROCESS: - modeStr = 'Exclusive Process' - else: - modeStr = 'Unknown' - except NVMLError as err: - modeStr = handleError(err) - - strResult += ' ' + modeStr + '\n' - - try: - util = nvmlDeviceGetUtilizationRates(handle) - gpu_util = str(util.gpu) - mem_util = str(util.memory) - except NVMLError as err: - error = handleError(err) - gpu_util = error - mem_util = error - - strResult += ' \n' - strResult += ' ' + gpu_util + ' %\n' - strResult += ' ' + mem_util + ' %\n' - strResult += ' \n' - - try: - (current, pending) = nvmlDeviceGetEccMode(handle) - curr_str = 'Enabled' if (current != 0) else 'Disabled' - pend_str = 'Enabled' if (pending != 0) else 'Disabled' - except NVMLError as err: - error = handleError(err) - curr_str = error - pend_str = error - - strResult += ' \n' - strResult += ' ' + curr_str + '\n' - strResult += ' ' + pend_str + '\n' - strResult += ' \n' - - strResult += ' \n' - strResult += GetEccStr(handle) - strResult += ' \n' - - try: - temp = str(nvmlDeviceGetTemperature(handle, NVML_TEMPERATURE_GPU)) + ' C' - except NVMLError as err: - temp = handleError(err) - - strResult += ' \n' - strResult += ' ' + temp + '\n' - strResult += ' \n' - - strResult += ' \n' - try: - perfState = nvmlDeviceGetPowerState(handle) - except NVMLError as err: - perfState = handleError(err) - strResult += ' P%s\n' % perfState - try: - powMan = nvmlDeviceGetPowerManagementMode(handle) - powManStr = 'Supported' if powMan != 0 else 'N/A' - except NVMLError as err: - powManStr = handleError(err) - strResult += ' ' + powManStr + '\n' - try: - powDraw = (nvmlDeviceGetPowerUsage(handle) / 1000.0) - powDrawStr = '%.2f W' % powDraw - except NVMLError as err: - powDrawStr = handleError(err) - strResult += ' ' + powDrawStr + '\n' - try: - powLimit = (nvmlDeviceGetPowerManagementLimit(handle) / 1000.0) - powLimitStr = '%d W' % powLimit - except NVMLError as err: - powLimitStr = handleError(err) - strResult += ' ' + powLimitStr + '\n' - strResult += ' \n' - - strResult += ' \n' - try: - graphics = str(nvmlDeviceGetClockInfo(handle, NVML_CLOCK_GRAPHICS)) - except NVMLError as err: - graphics = handleError(err) - strResult += ' ' +graphics + ' MHz\n' - try: - sm = str(nvmlDeviceGetClockInfo(handle, NVML_CLOCK_SM)) - except NVMLError as err: - sm = handleError(err) - strResult += ' ' + sm + ' MHz\n' - try: - mem = str(nvmlDeviceGetClockInfo(handle, NVML_CLOCK_MEM)) - except NVMLError as err: - mem = handleError(err) - strResult += ' ' + mem + ' MHz\n' - strResult += ' \n' - - strResult += ' \n' - try: - graphics = str(nvmlDeviceGetMaxClockInfo(handle, NVML_CLOCK_GRAPHICS)) - except NVMLError as err: - graphics = handleError(err) - strResult += ' ' + graphics + ' MHz\n' - try: - sm = str(nvmlDeviceGetMaxClockInfo(handle, NVML_CLOCK_SM)) - except NVMLError as err: - sm = handleError(err) - strResult += ' ' + sm + ' MHz\n' - try: - mem = str(nvmlDeviceGetMaxClockInfo(handle, NVML_CLOCK_MEM)) - except NVMLError as err: - mem = handleError(err) - strResult += ' ' + mem + ' MHz\n' - strResult += ' \n' - - try: - perfState = nvmlDeviceGetPowerState(handle) - perfStateStr = 'P%s' % perfState - except NVMLError as err: - perfStateStr = handleError(err) - strResult += ' ' + perfStateStr + '\n' - - strResult += ' \n' - - procstr = "" - try: - procs = nvmlDeviceGetComputeRunningProcesses(handle) - except NVMLError as err: - procs = [] - procstr = handleError(err) - - for p in procs: - procstr += ' \n' - procstr += ' %d\n' % p.pid - try: - name = str(nvmlSystemGetProcessName(p.pid)) - except NVMLError as err: - if (err.value == NVML_ERROR_NOT_FOUND): - # probably went away - continue - else: - name = handleError(err) - procstr += ' ' + name + '\n' - procstr += ' \n' - if (p.usedGpuMemory == None): - procstr += 'N\A' - else: - procstr += '%d MB\n' % (p.usedGpuMemory / 1024 / 1024) - procstr += '\n' - procstr += ' \n' - - strResult += procstr - strResult += ' \n' - strResult += ' \n' - - strResult += '\n' - - except NVMLError as err: - strResult += 'nvidia_smi.py: ' + err.__str__() + '\n' - - nvmlShutdown() - - return strResult - diff --git a/gpu/nvidia/nvidia-ml-py-3.295.00/pynvml.py b/gpu/nvidia/nvidia-ml-py-3.295.00/pynvml.py deleted file mode 100644 index 90f8bdd8..00000000 --- a/gpu/nvidia/nvidia-ml-py-3.295.00/pynvml.py +++ /dev/null @@ -1,903 +0,0 @@ -##### -# Copyright (c) 2011-2012, NVIDIA Corporation. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of the NVIDIA Corporation nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -# THE POSSIBILITY OF SUCH DAMAGE. -##### - -## -# Python bindings for the NVML library -## -from ctypes import * -from ctypes.util import find_library -import sys -import threading - -## C Type mappings ## -## Enums -_nvmlEnableState_t = c_uint -NVML_FEATURE_DISABLED = 0 -NVML_FEATURE_ENABLED = 1 - -_nvmlTemperatureSensors_t = c_uint -NVML_TEMPERATURE_GPU = 0 - -_nvmlComputeMode_t = c_uint -NVML_COMPUTEMODE_DEFAULT = 0 -NVML_COMPUTEMODE_EXCLUSIVE_THREAD = 1 -NVML_COMPUTEMODE_PROHIBITED = 2 -NVML_COMPUTEMODE_EXCLUSIVE_PROCESS = 3 - -_nvmlEccBitType_t = c_uint -NVML_SINGLE_BIT_ECC = 0 -NVML_DOUBLE_BIT_ECC = 1 - -_nvmlEccCounterType_t = c_uint -NVML_VOLATILE_ECC = 0 -NVML_AGGREGATE_ECC = 1 - -_nvmlClockType_t = c_uint -NVML_CLOCK_GRAPHICS = 0 -NVML_CLOCK_SM = 1 -NVML_CLOCK_MEM = 2 - -_nvmlDriverModel_t = c_uint -NVML_DRIVER_WDDM = 0 -NVML_DRIVER_WDM = 1 - -_nvmlPstates_t = c_uint -NVML_PSTATE_0 = 0 -NVML_PSTATE_1 = 1 -NVML_PSTATE_2 = 2 -NVML_PSTATE_3 = 3 -NVML_PSTATE_4 = 4 -NVML_PSTATE_5 = 5 -NVML_PSTATE_6 = 6 -NVML_PSTATE_7 = 7 -NVML_PSTATE_8 = 8 -NVML_PSTATE_9 = 9 -NVML_PSTATE_10 = 10 -NVML_PSTATE_11 = 11 -NVML_PSTATE_12 = 12 -NVML_PSTATE_13 = 13 -NVML_PSTATE_14 = 14 -NVML_PSTATE_15 = 15 -NVML_PSTATE_UNKNOWN = 32 - -_nvmlInforomObject_t = c_uint -NVML_INFOROM_OEM = 0 -NVML_INFOROM_ECC = 1 -NVML_INFOROM_POWER = 2 - -_nvmlReturn_t = c_uint -NVML_SUCCESS = 0 -NVML_ERROR_UNINITIALIZED = 1 -NVML_ERROR_INVALID_ARGUMENT = 2 -NVML_ERROR_NOT_SUPPORTED = 3 -NVML_ERROR_NO_PERMISSION = 4 -NVML_ERROR_ALREADY_INITIALIZED = 5 -NVML_ERROR_NOT_FOUND = 6 -NVML_ERROR_INSUFFICIENT_SIZE = 7 -NVML_ERROR_INSUFFICIENT_POWER = 8 -NVML_ERROR_DRIVER_NOT_LOADED = 9 -NVML_ERROR_TIMEOUT = 10, -NVML_ERROR_UNKNOWN = 999 - -_nvmlFanState_t = c_uint -NVML_FAN_NORMAL = 0 -NVML_FAN_FAILED = 1 - -_nvmlLedColor_t = c_uint -NVML_LED_COLOR_GREEN = 0 -NVML_LED_COLOR_AMBER = 1 - -# C preprocessor defined values -nvmlFlagDefault = 0 -nvmlFlagForce = 1 - -# buffer size -NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE = 16 -NVML_DEVICE_UUID_BUFFER_SIZE = 80 -NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE = 81 -NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE = 80 -NVML_DEVICE_NAME_BUFFER_SIZE = 64 -NVML_DEVICE_SERIAL_BUFFER_SIZE = 30 -NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE = 32 - -NVML_VALUE_NOT_AVAILABLE_ulonglong = c_ulonglong(-1) - -## Lib loading ## -nvmlLib = None -libLoadLock = threading.Lock() - -## Error Checking ## -class NVMLError(Exception): - def __init__(self, value): - self.value = value - def __str__(self): - return str(nvmlErrorString(self.value)) - -def _nvmlCheckReturn(ret): - if (ret != NVML_SUCCESS): - raise NVMLError(ret) - return ret - -## Function access ## -def _nvmlGetFunctionPointer(name): - global nvmlLib - global libLoadLock - - libLoadLock.acquire() - try: - # ensure library was loaded - if (nvmlLib == None): - raise NVMLError(NVML_ERROR_UNINITIALIZED) - try: - return getattr(nvmlLib, name) - except AttributeError as attrError: - raise NVMLError(NVML_ERROR_NOT_SUPPORTED) - finally: - # lock is always freed - libLoadLock.release() - -## Alternative object -# Allows the object to be printed -# Allows mismatched types to be assigned -# - like None when the Structure variant requires c_uint -class nvmlFriendlyObject(object): - def __init__(self, dictionary): - for x in dictionary: - setattr(self, x, dictionary[x]) - def __str__(self): - return self.__dict__.__str__() - -def nvmlStructToFriendlyObject(struct): - d = {} - for x in struct._fields_: - key = x[0] - value = getattr(struct, key) - d[key] = value - obj = nvmlFriendlyObject(d) - return obj - -# pack the object so it can be passed to the NVML library -def nvmlFriendlyObjectToStruct(obj, model): - for x in model._fields_: - key = x[0] - value = obj.__dict__[key] - setattr(model, key, value) - return model - -## Unit structures -class struct_c_nvmlUnit_t(Structure): - pass # opaque handle -c_nvmlUnit_t = POINTER(struct_c_nvmlUnit_t) - -class c_nvmlUnitInfo_t(Structure): - _fields_ = [ - ('name', c_char * 96), - ('id', c_char * 96), - ('serial', c_char * 96), - ('firmwareVersion', c_char * 96), - ] - -class c_nvmlLedState_t(Structure): - _fields_ = [ - ('cause', c_char * 256), - ('color', _nvmlLedColor_t), - ] - -class c_nvmlPSUInfo_t(Structure): - _fields_ = [ - ('state', c_char * 256), - ('current', c_uint), - ('voltage', c_uint), - ('power', c_uint), - ] - -class c_nvmlUnitFanInfo_t(Structure): - _fields_ = [ - ('speed', c_uint), - ('state', _nvmlFanState_t), - ] - -class c_nvmlUnitFanSpeeds_t(Structure): - _fields_ = [ - ('fans', c_nvmlUnitFanInfo_t * 24), - ('count', c_uint) - ] - -## Device structures -class struct_c_nvmlDevice_t(Structure): - pass # opaque handle -c_nvmlDevice_t = POINTER(struct_c_nvmlDevice_t) - -class nvmlPciInfo_t(Structure): - _fields_ = [ - ('busId', c_char * 16), - ('domain', c_uint), - ('bus', c_uint), - ('device', c_uint), - ('pciDeviceId', c_uint), - - # Added in 2.285 - ('pciSubSystemId', c_uint), - ('reserved0', c_uint), - ('reserved1', c_uint), - ('reserved2', c_uint), - ('reserved3', c_uint), - ] - -class c_nvmlMemory_t(Structure): - _fields_ = [ - ('total', c_ulonglong), - ('free', c_ulonglong), - ('used', c_ulonglong), - ] - -# On Windows with the WDDM driver, usedGpuMemory is reported as None -# Code that processes this structure should check for None, I.E. -# -# if (info.usedGpuMemory == None): -# # TODO handle the error -# pass -# else: -# print("Using %d MB of memory" % (info.usedGpuMemory / 1024 / 1024)) -# -# See NVML documentation for more information -class c_nvmlProcessInfo_t(Structure): - _fields_ = [ - ('pid', c_uint), - ('usedGpuMemory', c_ulonglong), - ] - -class c_nvmlEccErrorCounts_t(Structure): - _fields_ = [ - ('l1Cache', c_ulonglong), - ('l2Cache', c_ulonglong), - ('deviceMemory', c_ulonglong), - ('registerFile', c_ulonglong), - ] - -class c_nvmlUtilization_t(Structure): - _fields_ = [ - ('gpu', c_uint), - ('memory', c_uint), - ] - -# Added in 2.285 -class c_nvmlHwbcEntry_t(Structure): - _fields_ = [ - ('hwbcId', c_uint), - ('firmwareVersion', c_char * 32), - ] - -## Event structures -class struct_c_nvmlEventSet_t(Structure): - pass # opaque handle -c_nvmlEventSet_t = POINTER(struct_c_nvmlEventSet_t) - -nvmlEventTypeSingleBitEccError = 0x0000000000000001 -nvmlEventTypeDoubleBitEccError = 0x0000000000000002 -nvmlEventTypePState = 0x0000000000000004 -nvmlEventTypeXidCriticalError = 0x0000000000000008 -nvmlEventTypeNone = 0x0000000000000000 -nvmlEventTypeAll = ( - nvmlEventTypeNone | - nvmlEventTypeSingleBitEccError | - nvmlEventTypeDoubleBitEccError | - nvmlEventTypePState | - nvmlEventTypeXidCriticalError - ) - -class c_nvmlEventData_t(Structure): - _fields_ = [ - ('device', c_nvmlDevice_t), - ('eventType', c_ulonglong), - ('reserved', c_ulonglong) - ] - -## C function wrappers ## -def nvmlInit(): - global nvmlLib - global libLoadLock - - # - # Load the library if it isn't loaded already - # - if (nvmlLib == None): - # lock to ensure only one caller loads the library - libLoadLock.acquire() - - try: - # ensure the library still isn't loaded - if (nvmlLib == None): - try: - if (sys.platform[:3] == "win"): - # cdecl calling convention - nvmlLib = cdll.nvml - else: - # assume linux - nvmlLib = CDLL("libnvidia-ml.so") - except OSError as ose: - print(ose) - _nvmlCheckReturn(NVML_ERROR_DRIVER_NOT_LOADED) - if (nvmlLib == None): - print("Failed to load NVML") - _nvmlCheckReturn(NVML_ERROR_DRIVER_NOT_LOADED) - finally: - # lock is always freed - libLoadLock.release() - - # - # Initialize the library - # - fn = _nvmlGetFunctionPointer("nvmlInit") - ret = fn() - _nvmlCheckReturn(ret) - return None - -def nvmlShutdown(): - # - # Leave the library loaded, but shutdown the interface - # - fn = _nvmlGetFunctionPointer("nvmlShutdown") - ret = fn() - _nvmlCheckReturn(ret) - return None - -# Added in 2.285 -def nvmlErrorString(result): - fn = _nvmlGetFunctionPointer("nvmlErrorString") - fn.restype = c_char_p # otherwise return is an int - ret = fn(result) - return ret - -# Added in 2.285 -def nvmlSystemGetNVMLVersion(): - c_version = create_string_buffer(NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE) - fn = _nvmlGetFunctionPointer("nvmlSystemGetNVMLVersion") - ret = fn(c_version, c_uint(NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE)) - _nvmlCheckReturn(ret) - return c_version.value - -# Added in 2.285 -def nvmlSystemGetProcessName(pid): - c_name = create_string_buffer(1024) - fn = _nvmlGetFunctionPointer("nvmlSystemGetProcessName") - ret = fn(c_uint(pid), c_name, c_uint(1024)) - _nvmlCheckReturn(ret) - return c_name.value - -def nvmlSystemGetDriverVersion(): - c_version = create_string_buffer(NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE) - fn = _nvmlGetFunctionPointer("nvmlSystemGetDriverVersion") - ret = fn(c_version, c_uint(NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE)) - _nvmlCheckReturn(ret) - return c_version.value - -# Added in 2.285 -def nvmlSystemGetHicVersion(): - c_count = c_uint(0) - hics = None - fn = _nvmlGetFunctionPointer("nvmlSystemGetHicVersion") - - # get the count - ret = fn(byref(c_count), None) - - # this should only fail with insufficient size - if ((ret != NVML_SUCCESS) and - (ret != NVML_ERROR_INSUFFICIENT_SIZE)): - raise NVMLError(ret) - - # if there are no hics - if (c_count.value == 0): - return [] - - hic_array = c_nvmlHwbcEntry_t * c_count.value - hics = hic_array() - ret = fn(byref(c_count), hics) - _nvmlCheckReturn(ret) - return hics - -## Unit get functions -def nvmlUnitGetCount(): - c_count = c_uint() - fn = _nvmlGetFunctionPointer("nvmlUnitGetCount") - ret = fn(byref(c_count)) - _nvmlCheckReturn(ret) - return c_count.value - -def nvmlUnitGetHandleByIndex(index): - c_index = c_uint(index) - unit = c_nvmlUnit_t() - fn = _nvmlGetFunctionPointer("nvmlUnitGetHandleByIndex") - ret = fn(c_index, byref(unit)) - _nvmlCheckReturn(ret) - return unit - -def nvmlUnitGetUnitInfo(unit): - c_info = c_nvmlUnitInfo_t() - fn = _nvmlGetFunctionPointer("nvmlUnitGetUnitInfo") - ret = fn(unit, byref(c_info)) - _nvmlCheckReturn(ret) - return c_info - -def nvmlUnitGetLedState(unit): - c_state = c_nvmlLedState_t() - fn = _nvmlGetFunctionPointer("nvmlUnitGetLedState") - ret = fn(unit, byref(c_state)) - _nvmlCheckReturn(ret) - return c_state - -def nvmlUnitGetPsuInfo(unit): - c_info = c_nvmlPSUInfo_t() - fn = _nvmlGetFunctionPointer("nvmlUnitGetPsuInfo") - ret = fn(unit, byref(c_info)) - _nvmlCheckReturn(ret) - return c_info - -def nvmlUnitGetTemperature(unit, type): - c_temp = c_uint() - fn = _nvmlGetFunctionPointer("nvmlUnitGetTemperature") - ret = fn(unit, c_uint(type), byref(c_temp)) - _nvmlCheckReturn(ret) - return c_temp.value - -def nvmlUnitGetFanSpeedInfo(unit): - c_speeds = c_nvmlUnitFanSpeeds_t() - fn = _nvmlGetFunctionPointer("nvmlUnitGetFanSpeedInfo") - ret = fn(unit, byref(c_speeds)) - _nvmlCheckReturn(ret) - return c_speeds - -# added to API -def nvmlUnitGetDeviceCount(unit): - c_count = c_uint(0) - # query the unit to determine device count - fn = _nvmlGetFunctionPointer("nvmlUnitGetDevices") - ret = fn(unit, byref(c_count), None) - if (ret == NVML_ERROR_INSUFFICIENT_SIZE): - ret = NVML_ERROR_SUCCESS - _nvmlCheckReturn(ret) - return c_count.value - -def nvmlUnitGetDevices(unit): - c_count = c_uint(nvmlUnitGetDeviceCount(unit)) - device_array = c_nvmlDevice_t * c_count.value - c_devices = device_array() - fn = _nvmlGetFunctionPointer("nvmlUnitGetDevices") - ret = fn(unit, byref(c_count), c_devices) - _nvmlCheckReturn(ret) - return c_devices - -## Device get functions -def nvmlDeviceGetCount(): - c_count = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetCount") - ret = fn(byref(c_count)) - _nvmlCheckReturn(ret) - return c_count.value - -def nvmlDeviceGetHandleByIndex(index): - c_index = c_uint(index) - device = c_nvmlDevice_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetHandleByIndex") - ret = fn(c_index, byref(device)) - _nvmlCheckReturn(ret) - return device - -def nvmlDeviceGetHandleBySerial(serial): - c_serial = c_char_p(serial) - device = c_nvmlDevice_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetHandleBySerial") - ret = fn(c_serial, byref(device)) - _nvmlCheckReturn(ret) - return device - -def nvmlDeviceGetHandleByUUID(uuid): - c_uuid = c_char_p(uuid) - device = c_nvmlDevice_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetHandleByUUID") - ret = fn(c_uuid, byref(device)) - _nvmlCheckReturn(ret) - return device - -def nvmlDeviceGetHandleByPciBusId(pciBusId): - c_busId = c_char_p(pciBusId) - device = c_nvmlDevice_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetHandleByPciBusId") - ret = fn(c_busId, byref(device)) - _nvmlCheckReturn(ret) - return device - -def nvmlDeviceGetName(handle): - c_name = create_string_buffer(NVML_DEVICE_NAME_BUFFER_SIZE) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetName") - ret = fn(handle, c_name, c_uint(NVML_DEVICE_NAME_BUFFER_SIZE)) - _nvmlCheckReturn(ret) - return c_name.value - -def nvmlDeviceGetSerial(handle): - c_serial = create_string_buffer(NVML_DEVICE_SERIAL_BUFFER_SIZE) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetSerial") - ret = fn(handle, c_serial, c_uint(NVML_DEVICE_SERIAL_BUFFER_SIZE)) - _nvmlCheckReturn(ret) - return c_serial.value - -def nvmlDeviceGetUUID(handle): - c_uuid = create_string_buffer(NVML_DEVICE_UUID_BUFFER_SIZE) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetUUID") - ret = fn(handle, c_uuid, c_uint(NVML_DEVICE_UUID_BUFFER_SIZE)) - _nvmlCheckReturn(ret) - return c_uuid.value - -def nvmlDeviceGetInforomVersion(handle, infoRomObject): - c_version = create_string_buffer(NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetInforomVersion") - ret = fn(handle, _nvmlInforomObject_t(infoRomObject), - c_version, c_uint(NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE)) - _nvmlCheckReturn(ret) - return c_version.value - -def nvmlDeviceGetDisplayMode(handle): - c_mode = _nvmlEnableState_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetDisplayMode") - ret = fn(handle, byref(c_mode)) - _nvmlCheckReturn(ret) - return c_mode.value - -def nvmlDeviceGetPersistenceMode(handle): - c_state = _nvmlEnableState_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPersistenceMode") - ret = fn(handle, byref(c_state)) - _nvmlCheckReturn(ret) - return c_state.value - -def nvmlDeviceGetPciInfo(handle): - c_info = nvmlPciInfo_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPciInfo_v2") - ret = fn(handle, byref(c_info)) - _nvmlCheckReturn(ret) - return c_info - -def nvmlDeviceGetClockInfo(handle, type): - c_clock = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetClockInfo") - ret = fn(handle, _nvmlClockType_t(type), byref(c_clock)) - _nvmlCheckReturn(ret) - return c_clock.value - -# Added in 2.285 -def nvmlDeviceGetMaxClockInfo(handle, type): - c_clock = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetMaxClockInfo") - ret = fn(handle, _nvmlClockType_t(type), byref(c_clock)) - _nvmlCheckReturn(ret) - return c_clock.value - -def nvmlDeviceGetFanSpeed(handle): - c_speed = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetFanSpeed") - ret = fn(handle, byref(c_speed)) - _nvmlCheckReturn(ret) - return c_speed.value - -def nvmlDeviceGetTemperature(handle, sensor): - c_temp = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetTemperature") - ret = fn(handle, _nvmlTemperatureSensors_t(sensor), byref(c_temp)) - _nvmlCheckReturn(ret) - return c_temp.value - -# DEPRECATED use nvmlDeviceGetPerformanceState -def nvmlDeviceGetPowerState(handle): - c_pstate = _nvmlPstates_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerState") - ret = fn(handle, byref(c_pstate)) - _nvmlCheckReturn(ret) - return c_pstate.value - -def nvmlDeviceGetPerformanceState(handle): - c_pstate = _nvmlPstates_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPerformanceState") - ret = fn(handle, byref(c_pstate)) - _nvmlCheckReturn(ret) - return c_pstate.value - -def nvmlDeviceGetPowerManagementMode(handle): - c_pcapMode = _nvmlEnableState_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerManagementMode") - ret = fn(handle, byref(c_pcapMode)) - _nvmlCheckReturn(ret) - return c_pcapMode.value - -def nvmlDeviceGetPowerManagementLimit(handle): - c_limit = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerManagementLimit") - ret = fn(handle, byref(c_limit)) - _nvmlCheckReturn(ret) - return c_limit.value - -def nvmlDeviceGetPowerUsage(handle): - c_watts = c_uint() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerUsage") - ret = fn(handle, byref(c_watts)) - _nvmlCheckReturn(ret) - return c_watts.value - -def nvmlDeviceGetMemoryInfo(handle): - c_memory = c_nvmlMemory_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetMemoryInfo") - ret = fn(handle, byref(c_memory)) - _nvmlCheckReturn(ret) - return c_memory - -def nvmlDeviceGetComputeMode(handle): - c_mode = _nvmlComputeMode_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetComputeMode") - ret = fn(handle, byref(c_mode)) - _nvmlCheckReturn(ret) - return c_mode.value - -def nvmlDeviceGetEccMode(handle): - c_currState = _nvmlEnableState_t() - c_pendingState = _nvmlEnableState_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetEccMode") - ret = fn(handle, byref(c_currState), byref(c_pendingState)) - _nvmlCheckReturn(ret) - return [c_currState.value, c_pendingState.value] - -# added to API -def nvmlDeviceGetCurrentEccMode(handle): - return nvmlDeviceGetEccMode(handle)[0] - -# added to API -def nvmlDeviceGetPendingEccMode(handle): - return nvmlDeviceGetEccMode(handle)[1] - -def nvmlDeviceGetTotalEccErrors(handle, bitType, counterType): - c_count = c_ulonglong() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetTotalEccErrors") - ret = fn(handle, _nvmlEccBitType_t(bitType), - _nvmlEccCounterType_t(counterType), byref(c_count)) - _nvmlCheckReturn(ret) - return c_count.value - -def nvmlDeviceGetDetailedEccErrors(handle, bitType, counterType): - c_count = c_nvmlEccErrorCounts_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetDetailedEccErrors") - ret = fn(handle, _nvmlEccBitType_t(bitType), - _nvmlEccCounterType_t(counterType), byref(c_count)) - _nvmlCheckReturn(ret) - return c_count - -def nvmlDeviceGetUtilizationRates(handle): - c_util = c_nvmlUtilization_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetUtilizationRates") - ret = fn(handle, byref(c_util)) - _nvmlCheckReturn(ret) - return c_util - -def nvmlDeviceGetDriverModel(handle): - c_currModel = _nvmlDriverModel_t() - c_pendingModel = _nvmlDriverModel_t() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetDriverModel") - ret = fn(handle, byref(c_currModel), byref(c_pendingModel)) - _nvmlCheckReturn(ret) - return [c_currModel.value, c_pendingModel.value] - -# added to API -def nvmlDeviceGetCurrentDriverModel(handle): - return nvmlDeviceGetDriverModel(handle)[0] - -# added to API -def nvmlDeviceGetPendingDriverModel(handle): - return nvmlDeviceGetDriverModel(handle)[1] - -# Added in 2.285 -def nvmlDeviceGetVbiosVersion(handle): - c_version = create_string_buffer(NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetVbiosVersion") - ret = fn(handle, c_version, c_uint(NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE)) - _nvmlCheckReturn(ret) - return c_version.value - -# Added in 2.285 -def nvmlDeviceGetComputeRunningProcesses(handle): - # first call to get the size - c_count = c_uint(0) - fn = _nvmlGetFunctionPointer("nvmlDeviceGetComputeRunningProcesses") - ret = fn(handle, byref(c_count), None) - - if (ret == NVML_SUCCESS): - # special case, no running processes - return [] - elif (ret == NVML_ERROR_INSUFFICIENT_SIZE): - # typical case - # oversize the array incase more processes are created - c_count.value = c_count.value * 2 + 5 - proc_array = c_nvmlProcessInfo_t * c_count.value - c_procs = proc_array() - - # make the call again - ret = fn(handle, byref(c_count), c_procs) - _nvmlCheckReturn(ret) - - procs = [] - for i in range(c_count.value): - # use an alternative struct for this object - obj = nvmlStructToFriendlyObject(c_procs[i]) - if (obj.usedGpuMemory == NVML_VALUE_NOT_AVAILABLE_ulonglong.value): - # special case for WDDM on Windows, see comment above - obj.usedGpuMemory = None - procs.append(obj) - - return procs - else: - # error case - raise NVMLError(ret) - -## Set functions -def nvmlUnitSetLedState(unit, color): - fn = _nvmlGetFunctionPointer("nvmlUnitSetLedState") - ret = fn(unit, _nvmlLedColor_t(color)) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceSetPersistenceMode(handle, mode): - fn = _nvmlGetFunctionPointer("nvmlDeviceSetPersistenceMode") - ret = fn(handle, _nvmlEnableState_t(mode)) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceSetComputeMode(handle, mode): - fn = _nvmlGetFunctionPointer("nvmlDeviceSetComputeMode") - ret = fn(handle, _nvmlComputeMode_t(mode)) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceSetEccMode(handle, mode): - fn = _nvmlGetFunctionPointer("nvmlDeviceSetEccMode") - ret = fn(handle, _nvmlEnableState_t(mode)) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceClearEccErrorCounts(handle, counterType): - fn = _nvmlGetFunctionPointer("nvmlDeviceClearEccErrorCounts") - ret = fn(handle, _nvmlEccCounterType_t(counterType)) - _nvmlCheckReturn(ret) - return None - -def nvmlDeviceSetDriverModel(handle, model): - fn = _nvmlGetFunctionPointer("nvmlDeviceSetDriverModel") - ret = fn(handle, _nvmlDriverModel_t(model)) - _nvmlCheckReturn(ret) - return None - -# Added in 2.285 -def nvmlEventSetCreate(): - fn = _nvmlGetFunctionPointer("nvmlEventSetCreate") - eventSet = c_nvmlEventSet_t() - ret = fn(byref(eventSet)) - _nvmlCheckReturn(ret) - return eventSet - -# Added in 2.285 -def nvmlDeviceRegisterEvents(handle, eventTypes, eventSet): - fn = _nvmlGetFunctionPointer("nvmlDeviceRegisterEvents") - ret = fn(handle, c_ulonglong(eventTypes), eventSet) - _nvmlCheckReturn(ret) - return None - -# Added in 2.285 -def nvmlDeviceGetSupportedEventTypes(handle): - c_eventTypes = c_ulonglong() - fn = _nvmlGetFunctionPointer("nvmlDeviceGetSupportedEventTypes") - ret = fn(handle, byref(c_eventTypes)) - _nvmlCheckReturn(ret) - return c_eventTypes.value - -# Added in 2.285 -# raises NVML_ERROR_TIMEOUT exception on timeout -def nvmlEventSetWait(eventSet, timeoutms): - fn = _nvmlGetFunctionPointer("nvmlEventSetWait") - data = c_nvmlEventData_t() - ret = fn(eventSet, byref(data), c_uint(timeoutms)) - _nvmlCheckReturn(ret) - return data - -# Added in 2.285 -def nvmlEventSetFree(eventSet): - fn = _nvmlGetFunctionPointer("nvmlEventSetFree") - ret = fn(eventSet) - _nvmlCheckReturn(ret) - return None - -# Added in 2.285 -def nvmlEventDataGetPerformanceState(data): - fn = _nvmlGetFunctionPointer("nvmlEventDataGetPerformanceState") - pstate = _nvmlPstates_t() - ret = fn(byref(data), byref(pstate)) - _nvmlCheckReturn(ret) - return pstate.value - -# Added in 2.285 -def nvmlEventDataGetXidCriticalError(data): - fn = _nvmlGetFunctionPointer("nvmlEventDataGetXidCriticalError") - xid = c_uint() - ret = fn(byref(data), byref(xid)) - _nvmlCheckReturn(ret) - return xid.value - -# Added in 2.285 -def nvmlEventDataGetEccErrorCount(data): - fn = _nvmlGetFunctionPointer("nvmlEventDataGetEccErrorCount") - ecc = c_ulonglong() - ret = fn(byref(data), byref(ecc)) - _nvmlCheckReturn(ret) - return ecc.value - -# Added in 3.295 -def nvmlDeviceOnSameBoard(handle1, handle2): - fn = _nvmlGetFunctionPointer("nvmlDeviceOnSameBoard") - onSameBoard = c_int() - ret = fn(handle1, handle2, byref(onSameBoard)) - _nvmlCheckReturn(ret) - return (onSameBoard.value != 0) - -# Added in 3.295 -def nvmlDeviceGetCurrPcieLinkGeneration(handle): - fn = _nvmlGetFunctionPointer("nvmlDeviceGetCurrPcieLinkGeneration") - gen = c_uint() - ret = fn(handle, byref(gen)) - _nvmlCheckReturn(ret) - return gen.value - -# Added in 3.295 -def nvmlDeviceGetMaxPcieLinkGeneration(handle): - fn = _nvmlGetFunctionPointer("nvmlDeviceGetMaxPcieLinkGeneration") - gen = c_uint() - ret = fn(handle, byref(gen)) - _nvmlCheckReturn(ret) - return gen.value - -# Added in 3.295 -def nvmlDeviceGetCurrPcieLinkWidth(handle): - fn = _nvmlGetFunctionPointer("nvmlDeviceGetCurrPcieLinkWidth") - width = c_uint() - ret = fn(handle, byref(width)) - _nvmlCheckReturn(ret) - return width.value - -# Added in 3.295 -def nvmlDeviceGetMaxPcieLinkWidth(handle): - fn = _nvmlGetFunctionPointer("nvmlDeviceGetMaxPcieLinkWidth") - width = c_uint() - ret = fn(handle, byref(width)) - _nvmlCheckReturn(ret) - return width.value - - - diff --git a/gpu/nvidia/nvidia-ml-py-3.295.00/PKG-INFO b/gpu/nvidia/nvidia-ml-py-7.352.0/PKG-INFO similarity index 94% rename from gpu/nvidia/nvidia-ml-py-3.295.00/PKG-INFO rename to gpu/nvidia/nvidia-ml-py-7.352.0/PKG-INFO index 3c0212b9..cfe33490 100644 --- a/gpu/nvidia/nvidia-ml-py-3.295.00/PKG-INFO +++ b/gpu/nvidia/nvidia-ml-py-7.352.0/PKG-INFO @@ -1,6 +1,6 @@ -Metadata-Version: 1.0 +Metadata-Version: 1.1 Name: nvidia-ml-py -Version: 3.295.00 +Version: 7.352.0 Summary: Python Bindings for the NVIDIA Management Library Home-page: http://www.nvidia.com/ Author: NVIDIA Corporation diff --git a/gpu/nvidia/nvidia-ml-py-3.295.00/README.txt b/gpu/nvidia/nvidia-ml-py-7.352.0/README.txt similarity index 87% rename from gpu/nvidia/nvidia-ml-py-3.295.00/README.txt rename to gpu/nvidia/nvidia-ml-py-7.352.0/README.txt index 4cfec876..946860fd 100644 --- a/gpu/nvidia/nvidia-ml-py-3.295.00/README.txt +++ b/gpu/nvidia/nvidia-ml-py-7.352.0/README.txt @@ -32,13 +32,13 @@ USAGE >>> from pynvml import * >>> nvmlInit() >>> print "Driver Version:", nvmlSystemGetDriverVersion() - Driver Version: 295.00 + Driver Version: 352.00 >>> deviceCount = nvmlDeviceGetCount() >>> for i in range(deviceCount): ... handle = nvmlDeviceGetHandleByIndex(i) ... print "Device", i, ":", nvmlDeviceGetName(handle) ... - Device 0 : Tesla C2070 + Device 0 : Tesla K40c >>> nvmlShutdown() @@ -120,10 +120,23 @@ Version 3.295.0 - Added new functions for NVML 3.295. See NVML documentation for more information. - Updated nvidia_smi.py tool - Includes additional error handling +Version 4.304.0 +- Added new functions for NVML 4.304. See NVML documentation for more information. +- Updated nvidia_smi.py tool +Version 4.304.3 +- Fixing nvmlUnitGetDeviceCount bug +Version 5.319.0 +- Added new functions for NVML 5.319. See NVML documentation for more information. +Version 6.340.0 +- Added new functions for NVML 6.340. See NVML documentation for more information. +Version 7.346.0 +- Added new functions for NVML 7.346. See NVML documentation for more information. +Version 7.352.0 +- Added new functions for NVML 7.352. See NVML documentation for more information. COPYRIGHT --------- -Copyright (c) 2011-2012, NVIDIA Corporation. All rights reserved. +Copyright (c) 2011-2015, NVIDIA Corporation. All rights reserved. LICENSE ------- diff --git a/gpu/nvidia/nvidia-ml-py-7.352.0/nvidia_smi.py b/gpu/nvidia/nvidia-ml-py-7.352.0/nvidia_smi.py new file mode 100644 index 00000000..62bec1d0 --- /dev/null +++ b/gpu/nvidia/nvidia-ml-py-7.352.0/nvidia_smi.py @@ -0,0 +1,873 @@ +##### +# Copyright (c) 2011-2015, NVIDIA Corporation. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the NVIDIA Corporation nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +##### + +# +# nvidia_smi +# nvml_bindings nvidia com +# +# Sample code that attempts to reproduce the output of nvidia-smi -q -x +# For many cases the output should match +# +# Can be used as a library or a command line script +# +# To Run: +# $ python nvidia_smi.py +# + +from pynvml import * +import datetime + +# +# Helper functions +# +def GetEccByType(handle, counterType, errorType): + strResult = '' + + try: + deviceMemory = nvmlDeviceGetMemoryErrorCounter(handle, errorType, counterType, + NVML_MEMORY_LOCATION_DEVICE_MEMORY) + except NVMLError as err: + deviceMemory = handleError(err) + strResult += ' ' + str(deviceMemory) + '\n' + + try: + registerFile = nvmlDeviceGetMemoryErrorCounter(handle, errorType, counterType, + NVML_MEMORY_LOCATION_REGISTER_FILE) + except NVMLError as err: + registerFile = handleError(err) + + strResult += ' ' + str(registerFile) + '\n' + + try: + l1Cache = nvmlDeviceGetMemoryErrorCounter(handle, errorType, counterType, + NVML_MEMORY_LOCATION_L1_CACHE) + except NVMLError as err: + l1Cache = handleError(err) + strResult += ' ' + str(l1Cache) + '\n' + + try: + l2Cache = nvmlDeviceGetMemoryErrorCounter(handle, errorType, counterType, + NVML_MEMORY_LOCATION_L2_CACHE) + except NVMLError as err: + l2Cache = handleError(err) + strResult += ' ' + str(l2Cache) + '\n' + + try: + textureMemory = nvmlDeviceGetMemoryErrorCounter(handle, errorType, counterType, + NVML_MEMORY_LOCATION_TEXTURE_MEMORY) + except NVMLError as err: + textureMemory = handleError(err) + strResult += ' ' + str(textureMemory) + '\n' + + try: + count = str(nvmlDeviceGetTotalEccErrors(handle, errorType, counterType)) + except NVMLError as err: + count = handleError(err) + strResult += ' ' + count + '\n' + + return strResult + +def GetEccByCounter(handle, counterType): + strResult = '' + strResult += ' \n' + strResult += str(GetEccByType(handle, counterType, NVML_MEMORY_ERROR_TYPE_CORRECTED)) + strResult += ' \n' + strResult += ' \n' + strResult += str(GetEccByType(handle, counterType, NVML_MEMORY_ERROR_TYPE_UNCORRECTED)) + strResult += ' \n' + return strResult + +def GetEccStr(handle): + strResult = '' + strResult += ' \n' + strResult += str(GetEccByCounter(handle, NVML_VOLATILE_ECC)) + strResult += ' \n' + strResult += ' \n' + strResult += str(GetEccByCounter(handle, NVML_AGGREGATE_ECC)) + strResult += ' \n' + return strResult + +def GetRetiredPagesByCause(handle, cause): + strResult = '' + try: + pages = nvmlDeviceGetRetiredPages(handle, cause) + count = str(len(pages)) + except NVMLError as err: + error = handleError(err) + pages = None + count = error + strResult += ' ' + count + '\n' + if pages is not None: + strResult += ' \n' + for page in pages: + strResult += ' ' + "0x%016x" % page + '\n' + strResult += ' \n' + else: + strResult += ' ' + error + '\n' + return strResult + +def GetRetiredPagesStr(handle): + strResult = '' + causes = [ "multiple_single_bit_retirement", "double_bit_retirement" ] + for idx in range(NVML_PAGE_RETIREMENT_CAUSE_COUNT): + strResult += ' <' + causes[idx] + '>\n' + strResult += GetRetiredPagesByCause(handle, idx) + strResult += ' \n' + + strResult += ' ' + try: + if NVML_FEATURE_DISABLED == nvmlDeviceGetRetiredPagesPendingStatus(handle): + strResult += "No" + else: + strResult += "Yes" + except NVMLError as err: + strResult += handleError(err) + strResult += '\n' + return strResult + +def StrGOM(mode): + if mode == NVML_GOM_ALL_ON: + return "All On"; + elif mode == NVML_GOM_COMPUTE: + return "Compute"; + elif mode == NVML_GOM_LOW_DP: + return "Low Double Precision"; + else: + return "Unknown"; + +def GetClocksThrottleReasons(handle): + throttleReasons = [ + [nvmlClocksThrottleReasonGpuIdle, "clocks_throttle_reason_gpu_idle"], + [nvmlClocksThrottleReasonUserDefinedClocks, "clocks_throttle_reason_user_defined_clocks"], + [nvmlClocksThrottleReasonApplicationsClocksSetting, "clocks_throttle_reason_applications_clocks_setting"], + [nvmlClocksThrottleReasonSwPowerCap, "clocks_throttle_reason_sw_power_cap"], + [nvmlClocksThrottleReasonHwSlowdown, "clocks_throttle_reason_hw_slowdown"], + [nvmlClocksThrottleReasonUnknown, "clocks_throttle_reason_unknown"] + ]; + + strResult = '' + + try: + supportedClocksThrottleReasons = nvmlDeviceGetSupportedClocksThrottleReasons(handle); + clocksThrottleReasons = nvmlDeviceGetCurrentClocksThrottleReasons(handle); + strResult += ' \n' + for (mask, name) in throttleReasons: + if (name != "clocks_throttle_reason_user_defined_clocks"): + if (mask & supportedClocksThrottleReasons): + val = "Active" if mask & clocksThrottleReasons else "Not Active"; + else: + val = "N/A";# handleError(NVML_ERROR_NOT_SUPPORTED); + strResult += " <%s>%s\n" % (name, val, name); + strResult += ' \n' + except NVMLError as err: + strResult += ' %s\n' % (handleError(err)); + + return strResult; + +# +# Converts errors into string messages +# +def handleError(err): + if (err.value == NVML_ERROR_NOT_SUPPORTED): + return "N/A" + else: + return err.__str__() + +####### +def XmlDeviceQuery(): + + strResult = '' + try: + # + # Initialize NVML + # + nvmlInit() + + strResult += '\n' + strResult += '\n' + strResult += '\n' + + strResult += ' ' + str(datetime.date.today()) + '\n' + strResult += ' ' + str(nvmlSystemGetDriverVersion()) + '\n' + + deviceCount = nvmlDeviceGetCount() + strResult += ' ' + str(deviceCount) + '\n' + + for i in range(0, deviceCount): + handle = nvmlDeviceGetHandleByIndex(i) + + pciInfo = nvmlDeviceGetPciInfo(handle) + + strResult += ' \n' % pciInfo.busId + + strResult += ' ' + nvmlDeviceGetName(handle) + '\n' + + brandNames = {NVML_BRAND_UNKNOWN : "Unknown", + NVML_BRAND_QUADRO : "Quadro", + NVML_BRAND_TESLA : "Tesla", + NVML_BRAND_NVS : "NVS", + NVML_BRAND_GRID : "Grid", + NVML_BRAND_GEFORCE : "GeForce", + } + + try: + # if nvmlDeviceGetBrand() succeeds it is guaranteed to be in the dictionary + brandName = brandNames[nvmlDeviceGetBrand(handle)] + except NVMLError as err: + brandName = handleError(err) + + + strResult += ' ' + brandName + '\n' + + try: + state = ('Enabled' if (nvmlDeviceGetDisplayMode(handle) != 0) else 'Disabled') + except NVMLError as err: + state = handleError(err) + + strResult += ' ' + state + '\n' + + try: + state = ('Enabled' if (nvmlDeviceGetDisplayActive(handle) != 0) else 'Disabled') + except NVMLError as err: + state = handleError(err) + + strResult += ' ' + state + '\n' + + try: + mode = 'Enabled' if (nvmlDeviceGetPersistenceMode(handle) != 0) else 'Disabled' + except NVMLError as err: + mode = handleError(err) + + strResult += ' ' + mode + '\n' + + try: + mode = 'Enabled' if (nvmlDeviceGetAccountingMode(handle) != 0) else 'Disabled' + except NVMLError as err: + mode = handleError(err) + + strResult += ' ' + mode + '\n' + + try: + bufferSize = str(nvmlDeviceGetAccountingBufferSize(handle)) + except NVMLError as err: + bufferSize = handleError(err) + + strResult += ' ' + bufferSize + '\n' + + strResult += ' \n' + + try: + current = 'WDDM' if (nvmlDeviceGetCurrentDriverModel(handle) == NVML_DRIVER_WDDM) else 'TCC' + except NVMLError as err: + current = handleError(err) + strResult += ' ' + current + '\n' + + try: + pending = 'WDDM' if (nvmlDeviceGetPendingDriverModel(handle) == NVML_DRIVER_WDDM) else 'TCC' + except NVMLError as err: + pending = handleError(err) + + strResult += ' ' + pending + '\n' + + strResult += ' \n' + + try: + serial = nvmlDeviceGetSerial(handle) + except NVMLError as err: + serial = handleError(err) + + strResult += ' ' + serial + '\n' + + try: + uuid = nvmlDeviceGetUUID(handle) + except NVMLError as err: + uuid = handleError(err) + + strResult += ' ' + uuid + '\n' + + try: + minor_number = nvmlDeviceGetMinorNumber(handle) + except NVMLError as err: + minor_number = handleError(err) + + strResult += ' ' + str(minor_number) + '\n' + + try: + vbios = nvmlDeviceGetVbiosVersion(handle) + except NVMLError as err: + vbios = handleError(err) + + strResult += ' ' + vbios + '\n' + + try: + multiGpuBool = nvmlDeviceGetMultiGpuBoard(handle) + except NVMLError as err: + multiGpuBool = handleError(err); + + if multiGpuBool == "N/A": + strResult += ' ' + 'N/A' + '\n' + elif multiGpuBool: + strResult += ' ' + 'Yes' + '\n' + else: + strResult += ' ' + 'No' + '\n' + + try: + boardId = nvmlDeviceGetBoardId(handle) + except NVMLError as err: + boardId = handleError(err) + + try: + hexBID = "0x%x" % boardId + except: + hexBID = boardId + + strResult += ' ' + hexBID + '\n' + + strResult += ' \n' + + try: + img = nvmlDeviceGetInforomImageVersion(handle) + except NVMLError as err: + img = handleError(err) + + strResult += ' ' + img + '\n' + + try: + oem = nvmlDeviceGetInforomVersion(handle, NVML_INFOROM_OEM) + except NVMLError as err: + oem = handleError(err) + + strResult += ' ' + oem + '\n' + + try: + ecc = nvmlDeviceGetInforomVersion(handle, NVML_INFOROM_ECC) + except NVMLError as err: + ecc = handleError(err) + + strResult += ' ' + ecc + '\n' + + try: + pwr = nvmlDeviceGetInforomVersion(handle, NVML_INFOROM_POWER) + except NVMLError as err: + pwr = handleError(err) + + strResult += ' ' + pwr + '\n' + + strResult += ' \n' + + strResult += ' \n' + + try: + current = StrGOM(nvmlDeviceGetCurrentGpuOperationMode(handle)) + except NVMLError as err: + current = handleError(err) + strResult += ' ' + current + '\n' + + try: + pending = StrGOM(nvmlDeviceGetPendingGpuOperationMode(handle)) + except NVMLError as err: + pending = handleError(err) + + strResult += ' ' + pending + '\n' + + strResult += ' \n' + + strResult += ' \n' + strResult += ' %02X\n' % pciInfo.bus + strResult += ' %02X\n' % pciInfo.device + strResult += ' %04X\n' % pciInfo.domain + strResult += ' %08X\n' % (pciInfo.pciDeviceId) + strResult += ' ' + str(pciInfo.busId) + '\n' + strResult += ' %08X\n' % (pciInfo.pciSubSystemId) + strResult += ' \n' + + + strResult += ' \n' + + try: + gen = str(nvmlDeviceGetMaxPcieLinkGeneration(handle)) + except NVMLError as err: + gen = handleError(err) + + strResult += ' ' + gen + '\n' + + try: + gen = str(nvmlDeviceGetCurrPcieLinkGeneration(handle)) + except NVMLError as err: + gen = handleError(err) + + strResult += ' ' + gen + '\n' + strResult += ' \n' + strResult += ' \n' + + try: + width = str(nvmlDeviceGetMaxPcieLinkWidth(handle)) + 'x' + except NVMLError as err: + width = handleError(err) + + strResult += ' ' + width + '\n' + + try: + width = str(nvmlDeviceGetCurrPcieLinkWidth(handle)) + 'x' + except NVMLError as err: + width = handleError(err) + + strResult += ' ' + width + '\n' + + strResult += ' \n' + strResult += ' \n' + + + strResult += ' \n' + + try: + bridgeHierarchy = nvmlDeviceGetBridgeChipInfo(handle) + bridge_type = '' + if bridgeHierarchy.bridgeChipInfo[0].type == 0: + bridge_type += 'PLX' + else: + bridge_type += 'BR04' + strResult += ' ' + bridge_type + '\n' + + if bridgeHierarchy.bridgeChipInfo[0].fwVersion == 0: + strFwVersion = 'N/A' + else: + strFwVersion = '%08X' % (bridgeHierarchy.bridgeChipInfo[0].fwVersion) + strResult += ' %s\n' % (strFwVersion) + except NVMLError as err: + strResult += ' ' + handleError(err) + '\n' + strResult += ' ' + handleError(err) + '\n' + + # Add additional code for hierarchy of bridges for Bug # 1382323 + strResult += ' \n' + + try: + replay = nvmlDeviceGetPcieReplayCounter(handle) + strResult += ' ' + str(replay) + '' + except NVMLError as err: + strResult += ' ' + handleError(err) + '' + + try: + tx_bytes = nvmlDeviceGetPcieThroughput(handle, NVML_PCIE_UTIL_TX_BYTES) + strResult += ' ' + str(tx_bytes) + ' KB/s' + '' + except NVMLError as err: + strResult += ' ' + handleError(err) + '' + + try: + rx_bytes = nvmlDeviceGetPcieThroughput(handle, NVML_PCIE_UTIL_RX_BYTES) + strResult += ' ' + str(rx_bytes) + ' KB/s' + '' + except NVMLError as err: + strResult += ' ' + handleError(err) + '' + + + strResult += ' \n' + + try: + fan = str(nvmlDeviceGetFanSpeed(handle)) + ' %' + except NVMLError as err: + fan = handleError(err) + strResult += ' ' + fan + '\n' + + try: + perfState = nvmlDeviceGetPowerState(handle) + perfStateStr = 'P%s' % perfState + except NVMLError as err: + perfStateStr = handleError(err) + strResult += ' ' + perfStateStr + '\n' + + strResult += GetClocksThrottleReasons(handle); + + try: + memInfo = nvmlDeviceGetMemoryInfo(handle) + mem_total = str(memInfo.total / 1024 / 1024) + ' MiB' + mem_used = str(memInfo.used / 1024 / 1024) + ' MiB' + mem_free = str(memInfo.total / 1024 / 1024 - memInfo.used / 1024 / 1024) + ' MiB' + except NVMLError as err: + error = handleError(err) + mem_total = error + mem_used = error + mem_free = error + + strResult += ' \n' + strResult += ' ' + mem_total + '\n' + strResult += ' ' + mem_used + '\n' + strResult += ' ' + mem_free + '\n' + strResult += ' \n' + + try: + memInfo = nvmlDeviceGetBAR1MemoryInfo(handle) + mem_total = str(memInfo.bar1Total / 1024 / 1024) + ' MiB' + mem_used = str(memInfo.bar1Used / 1024 / 1024) + ' MiB' + mem_free = str(memInfo.bar1Total / 1024 / 1024 - memInfo.bar1Used / 1024 / 1024) + ' MiB' + except NVMLError as err: + error = handleError(err) + mem_total = error + mem_used = error + mem_free = error + + strResult += ' \n' + strResult += ' ' + mem_total + '\n' + strResult += ' ' + mem_used + '\n' + strResult += ' ' + mem_free + '\n' + strResult += ' \n' + + try: + mode = nvmlDeviceGetComputeMode(handle) + if mode == NVML_COMPUTEMODE_DEFAULT: + modeStr = 'Default' + elif mode == NVML_COMPUTEMODE_EXCLUSIVE_THREAD: + modeStr = 'Exclusive Thread' + elif mode == NVML_COMPUTEMODE_PROHIBITED: + modeStr = 'Prohibited' + elif mode == NVML_COMPUTEMODE_EXCLUSIVE_PROCESS: + modeStr = 'Exclusive_Process' + else: + modeStr = 'Unknown' + except NVMLError as err: + modeStr = handleError(err) + + strResult += ' ' + modeStr + '\n' + + try: + util = nvmlDeviceGetUtilizationRates(handle) + gpu_util = str(util.gpu) + ' %' + mem_util = str(util.memory) + ' %' + except NVMLError as err: + error = handleError(err) + gpu_util = error + mem_util = error + + strResult += ' \n' + strResult += ' ' + gpu_util + '\n' + strResult += ' ' + mem_util + '\n' + + try: + (util_int, ssize) = nvmlDeviceGetEncoderUtilization(handle) + encoder_util = str(util_int) + ' %' + except NVMLError as err: + error = handleError(err) + encoder_util = error + + strResult += ' ' + encoder_util + '\n' + + try: + (util_int, ssize) = nvmlDeviceGetDecoderUtilization(handle) + decoder_util = str(util_int) + ' %' + except NVMLError as err: + error = handleError(err) + decoder_util = error + + strResult += ' ' + decoder_util + '\n' + + strResult += ' \n' + + try: + (current, pending) = nvmlDeviceGetEccMode(handle) + curr_str = 'Enabled' if (current != 0) else 'Disabled' + pend_str = 'Enabled' if (pending != 0) else 'Disabled' + except NVMLError as err: + error = handleError(err) + curr_str = error + pend_str = error + + strResult += ' \n' + strResult += ' ' + curr_str + '\n' + strResult += ' ' + pend_str + '\n' + strResult += ' \n' + + strResult += ' \n' + strResult += GetEccStr(handle) + strResult += ' \n' + + strResult += ' \n' + strResult += GetRetiredPagesStr(handle) + strResult += ' \n' + + try: + temp = str(nvmlDeviceGetTemperature(handle, NVML_TEMPERATURE_GPU)) + ' C' + except NVMLError as err: + temp = handleError(err) + + strResult += ' \n' + strResult += ' ' + temp + '\n' + + try: + temp = str(nvmlDeviceGetTemperatureThreshold(handle, NVML_TEMPERATURE_THRESHOLD_SHUTDOWN)) + ' C' + except NVMLError as err: + temp = handleError(err) + + strResult += ' ' + temp + '\n' + + try: + temp = str(nvmlDeviceGetTemperatureThreshold(handle, NVML_TEMPERATURE_THRESHOLD_SLOWDOWN)) + ' C' + except NVMLError as err: + temp = handleError(err) + + strResult += ' ' + temp + '\n' + strResult += ' \n' + + strResult += ' \n' + try: + perfState = 'P' + str(nvmlDeviceGetPowerState(handle)) + except NVMLError as err: + perfState = handleError(err) + strResult += ' %s\n' % perfState + try: + powMan = nvmlDeviceGetPowerManagementMode(handle) + powManStr = 'Supported' if powMan != 0 else 'N/A' + except NVMLError as err: + powManStr = handleError(err) + strResult += ' ' + powManStr + '\n' + try: + powDraw = (nvmlDeviceGetPowerUsage(handle) / 1000.0) + powDrawStr = '%.2f W' % powDraw + except NVMLError as err: + powDrawStr = handleError(err) + strResult += ' ' + powDrawStr + '\n' + try: + powLimit = (nvmlDeviceGetPowerManagementLimit(handle) / 1000.0) + powLimitStr = '%.2f W' % powLimit + except NVMLError as err: + powLimitStr = handleError(err) + strResult += ' ' + powLimitStr + '\n' + try: + powLimit = (nvmlDeviceGetPowerManagementDefaultLimit(handle) / 1000.0) + powLimitStr = '%.2f W' % powLimit + except NVMLError as err: + powLimitStr = handleError(err) + strResult += ' ' + powLimitStr + '\n' + + try: + enforcedPowLimit = (nvmlDeviceGetEnforcedPowerLimit(handle) / 1000.0) + enforcedPowLimitStr = '%.2f W' % enforcedPowLimit + except NVMLError as err: + enforcedPowLimitStr = handleError(err) + + strResult += ' ' + enforcedPowLimitStr + '\n' + + try: + powLimit = nvmlDeviceGetPowerManagementLimitConstraints(handle) + powLimitStrMin = '%.2f W' % (powLimit[0] / 1000.0) + powLimitStrMax = '%.2f W' % (powLimit[1] / 1000.0) + except NVMLError as err: + error = handleError(err) + powLimitStrMin = error + powLimitStrMax = error + strResult += ' ' + powLimitStrMin + '\n' + strResult += ' ' + powLimitStrMax + '\n' + + strResult += ' \n' + + strResult += ' \n' + try: + graphics = str(nvmlDeviceGetClockInfo(handle, NVML_CLOCK_GRAPHICS)) + ' MHz' + except NVMLError as err: + graphics = handleError(err) + strResult += ' ' +graphics + '\n' + try: + sm = str(nvmlDeviceGetClockInfo(handle, NVML_CLOCK_SM)) + ' MHz' + except NVMLError as err: + sm = handleError(err) + strResult += ' ' + sm + '\n' + try: + mem = str(nvmlDeviceGetClockInfo(handle, NVML_CLOCK_MEM)) + ' MHz' + except NVMLError as err: + mem = handleError(err) + strResult += ' ' + mem + '\n' + strResult += ' \n' + + strResult += ' \n' + try: + graphics = str(nvmlDeviceGetApplicationsClock(handle, NVML_CLOCK_GRAPHICS)) + ' MHz' + except NVMLError as err: + graphics = handleError(err) + strResult += ' ' +graphics + '\n' + try: + mem = str(nvmlDeviceGetApplicationsClock(handle, NVML_CLOCK_MEM)) + ' MHz' + except NVMLError as err: + mem = handleError(err) + strResult += ' ' + mem + '\n' + strResult += ' \n' + + strResult += ' \n' + try: + graphics = str(nvmlDeviceGetDefaultApplicationsClock(handle, NVML_CLOCK_GRAPHICS)) + ' MHz' + except NVMLError as err: + graphics = handleError(err) + strResult += ' ' +graphics + '\n' + try: + mem = str(nvmlDeviceGetDefaultApplicationsClock(handle, NVML_CLOCK_MEM)) + ' MHz' + except NVMLError as err: + mem = handleError(err) + strResult += ' ' + mem + '\n' + strResult += ' \n' + + strResult += ' \n' + try: + graphics = str(nvmlDeviceGetMaxClockInfo(handle, NVML_CLOCK_GRAPHICS)) + ' MHz' + except NVMLError as err: + graphics = handleError(err) + strResult += ' ' + graphics + '\n' + try: + sm = str(nvmlDeviceGetMaxClockInfo(handle, NVML_CLOCK_SM)) + ' MHz' + except NVMLError as err: + sm = handleError(err) + strResult += ' ' + sm + '\n' + try: + mem = str(nvmlDeviceGetMaxClockInfo(handle, NVML_CLOCK_MEM)) + ' MHz' + except NVMLError as err: + mem = handleError(err) + strResult += ' ' + mem + '\n' + strResult += ' \n' + + strResult += ' \n' + try: + boostedState, boostedDefaultState = nvmlDeviceGetAutoBoostedClocksEnabled(handle) + if boostedState == NVML_FEATURE_DISABLED: + autoBoostStr = "Off" + else: + autoBoostStr = "On" + + if boostedDefaultState == NVML_FEATURE_DISABLED: + autoBoostDefaultStr = "Off" + else: + autoBoostDefaultStr = "On" + + except NVMLError_NotSupported: + autoBoostStr = "N/A" + autoBoostDefaultStr = "N/A" + except NVMLError as err: + autoBoostStr = handleError(err) + autoBoostDefaultStr = handleError(err) + pass + strResult += ' ' + autoBoostStr + '\n' + strResult += ' ' + autoBoostDefaultStr + '\n' + strResult += ' \n' + + try: + memClocks = nvmlDeviceGetSupportedMemoryClocks(handle) + strResult += ' \n' + + for m in memClocks: + strResult += ' \n' + strResult += ' %d MHz\n' % m + try: + clocks = nvmlDeviceGetSupportedGraphicsClocks(handle, m) + for c in clocks: + strResult += ' %d MHz\n' % c + except NVMLError as err: + strResult += ' %s\n' % handleError(err) + strResult += ' \n' + + strResult += ' \n' + except NVMLError as err: + strResult += ' ' + handleError(err) + '\n' + + try: + procs = nvmlDeviceGetComputeRunningProcesses(handle) + strResult += ' \n' + + for p in procs: + try: + name = str(nvmlSystemGetProcessName(p.pid)) + except NVMLError as err: + if (err.value == NVML_ERROR_NOT_FOUND): + # probably went away + continue + else: + name = handleError(err) + + strResult += ' \n' + strResult += ' %d\n' % p.pid + strResult += ' ' + name + '\n' + + if (p.usedGpuMemory == None): + mem = 'N\A' + else: + mem = '%d MiB' % (p.usedGpuMemory / 1024 / 1024) + strResult += ' ' + mem + '\n' + strResult += ' \n' + + strResult += ' \n' + except NVMLError as err: + strResult += ' ' + handleError(err) + '\n' + + + try: + pids = nvmlDeviceGetAccountingPids(handle) + strResult += ' \n' + + for pid in pids : + try: + stats = nvmlDeviceGetAccountingStats(handle, pid) + gpuUtilization = "%d %%" % stats.gpuUtilization + memoryUtilization = "%d %%" % stats.memoryUtilization + if (stats.maxMemoryUsage == None): + maxMemoryUsage = 'N\A' + else: + maxMemoryUsage = '%d MiB' % (stats.maxMemoryUsage / 1024 / 1024) + time = "%d ms" % stats.time + is_running = "%d" % stats.isRunning + except NVMLError as err: + if (err.value == NVML_ERROR_NOT_FOUND): + # probably went away + continue + err = handleError(err) + gpuUtilization = err + memoryUtilization = err + maxMemoryUsage = err + time = err + is_running = err + + strResult += ' \n' + strResult += ' %d\n' % pid + strResult += ' ' + gpuUtilization + '\n' + strResult += ' ' + memoryUtilization + '\n' + strResult += ' ' + maxMemoryUsage+ '\n' + strResult += ' \n' + strResult += ' ' + is_running + '\n' + strResult += ' \n' + + strResult += ' \n' + except NVMLError as err: + strResult += ' ' + handleError(err) + '\n' + + strResult += ' \n' + + strResult += '\n' + + except NVMLError as err: + strResult += 'nvidia_smi.py: ' + err.__str__() + '\n' + + nvmlShutdown() + + return strResult + +# this is not exectued when module is imported +if __name__ == "__main__": + print(XmlDeviceQuery()) diff --git a/gpu/nvidia/nvidia-ml-py-7.352.0/pynvml.py b/gpu/nvidia/nvidia-ml-py-7.352.0/pynvml.py new file mode 100644 index 00000000..fa67e55a --- /dev/null +++ b/gpu/nvidia/nvidia-ml-py-7.352.0/pynvml.py @@ -0,0 +1,1701 @@ +##### +# Copyright (c) 2011-2015, NVIDIA Corporation. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the NVIDIA Corporation nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +##### + +## +# Python bindings for the NVML library +## +from ctypes import * +from ctypes.util import find_library +import sys +import os +import threading +import string + +## C Type mappings ## +## Enums +_nvmlEnableState_t = c_uint +NVML_FEATURE_DISABLED = 0 +NVML_FEATURE_ENABLED = 1 + +_nvmlBrandType_t = c_uint +NVML_BRAND_UNKNOWN = 0 +NVML_BRAND_QUADRO = 1 +NVML_BRAND_TESLA = 2 +NVML_BRAND_NVS = 3 +NVML_BRAND_GRID = 4 +NVML_BRAND_GEFORCE = 5 +NVML_BRAND_COUNT = 6 + +_nvmlTemperatureThresholds_t = c_uint +NVML_TEMPERATURE_THRESHOLD_SHUTDOWN = 0 +NVML_TEMPERATURE_THRESHOLD_SLOWDOWN = 1 +NVML_TEMPERATURE_THRESHOLD_COUNT = 1 + +_nvmlTemperatureSensors_t = c_uint +NVML_TEMPERATURE_GPU = 0 +NVML_TEMPERATURE_COUNT = 1 + +_nvmlComputeMode_t = c_uint +NVML_COMPUTEMODE_DEFAULT = 0 +NVML_COMPUTEMODE_EXCLUSIVE_THREAD = 1 +NVML_COMPUTEMODE_PROHIBITED = 2 +NVML_COMPUTEMODE_EXCLUSIVE_PROCESS = 3 +NVML_COMPUTEMODE_COUNT = 4 + +_nvmlMemoryLocation_t = c_uint +NVML_MEMORY_LOCATION_L1_CACHE = 0 +NVML_MEMORY_LOCATION_L2_CACHE = 1 +NVML_MEMORY_LOCATION_DEVICE_MEMORY = 2 +NVML_MEMORY_LOCATION_REGISTER_FILE = 3 +NVML_MEMORY_LOCATION_TEXTURE_MEMORY = 4 +NVML_MEMORY_LOCATION_COUNT = 5 + +# These are deprecated, instead use _nvmlMemoryErrorType_t +_nvmlEccBitType_t = c_uint +NVML_SINGLE_BIT_ECC = 0 +NVML_DOUBLE_BIT_ECC = 1 +NVML_ECC_ERROR_TYPE_COUNT = 2 + +_nvmlEccCounterType_t = c_uint +NVML_VOLATILE_ECC = 0 +NVML_AGGREGATE_ECC = 1 +NVML_ECC_COUNTER_TYPE_COUNT = 2 + +_nvmlMemoryErrorType_t = c_uint +NVML_MEMORY_ERROR_TYPE_CORRECTED = 0 +NVML_MEMORY_ERROR_TYPE_UNCORRECTED = 1 +NVML_MEMORY_ERROR_TYPE_COUNT = 2 + +_nvmlClockType_t = c_uint +NVML_CLOCK_GRAPHICS = 0 +NVML_CLOCK_SM = 1 +NVML_CLOCK_MEM = 2 +NVML_CLOCK_COUNT = 3 + +_nvmlDriverModel_t = c_uint +NVML_DRIVER_WDDM = 0 +NVML_DRIVER_WDM = 1 + +_nvmlPstates_t = c_uint +NVML_PSTATE_0 = 0 +NVML_PSTATE_1 = 1 +NVML_PSTATE_2 = 2 +NVML_PSTATE_3 = 3 +NVML_PSTATE_4 = 4 +NVML_PSTATE_5 = 5 +NVML_PSTATE_6 = 6 +NVML_PSTATE_7 = 7 +NVML_PSTATE_8 = 8 +NVML_PSTATE_9 = 9 +NVML_PSTATE_10 = 10 +NVML_PSTATE_11 = 11 +NVML_PSTATE_12 = 12 +NVML_PSTATE_13 = 13 +NVML_PSTATE_14 = 14 +NVML_PSTATE_15 = 15 +NVML_PSTATE_UNKNOWN = 32 + +_nvmlInforomObject_t = c_uint +NVML_INFOROM_OEM = 0 +NVML_INFOROM_ECC = 1 +NVML_INFOROM_POWER = 2 +NVML_INFOROM_COUNT = 3 + +_nvmlReturn_t = c_uint +NVML_SUCCESS = 0 +NVML_ERROR_UNINITIALIZED = 1 +NVML_ERROR_INVALID_ARGUMENT = 2 +NVML_ERROR_NOT_SUPPORTED = 3 +NVML_ERROR_NO_PERMISSION = 4 +NVML_ERROR_ALREADY_INITIALIZED = 5 +NVML_ERROR_NOT_FOUND = 6 +NVML_ERROR_INSUFFICIENT_SIZE = 7 +NVML_ERROR_INSUFFICIENT_POWER = 8 +NVML_ERROR_DRIVER_NOT_LOADED = 9 +NVML_ERROR_TIMEOUT = 10 +NVML_ERROR_IRQ_ISSUE = 11 +NVML_ERROR_LIBRARY_NOT_FOUND = 12 +NVML_ERROR_FUNCTION_NOT_FOUND = 13 +NVML_ERROR_CORRUPTED_INFOROM = 14 +NVML_ERROR_GPU_IS_LOST = 15 +NVML_ERROR_RESET_REQUIRED = 16 +NVML_ERROR_OPERATING_SYSTEM = 17 +NVML_ERROR_LIB_RM_VERSION_MISMATCH = 18 +NVML_ERROR_UNKNOWN = 999 + +_nvmlFanState_t = c_uint +NVML_FAN_NORMAL = 0 +NVML_FAN_FAILED = 1 + +_nvmlLedColor_t = c_uint +NVML_LED_COLOR_GREEN = 0 +NVML_LED_COLOR_AMBER = 1 + +_nvmlGpuOperationMode_t = c_uint +NVML_GOM_ALL_ON = 0 +NVML_GOM_COMPUTE = 1 +NVML_GOM_LOW_DP = 2 + +_nvmlPageRetirementCause_t = c_uint +NVML_PAGE_RETIREMENT_CAUSE_DOUBLE_BIT_ECC_ERROR = 0 +NVML_PAGE_RETIREMENT_CAUSE_MULTIPLE_SINGLE_BIT_ECC_ERRORS = 1 +NVML_PAGE_RETIREMENT_CAUSE_COUNT = 2 + +_nvmlRestrictedAPI_t = c_uint +NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS = 0 +NVML_RESTRICTED_API_SET_AUTO_BOOSTED_CLOCKS = 1 +NVML_RESTRICTED_API_COUNT = 2 + +_nvmlBridgeChipType_t = c_uint +NVML_BRIDGE_CHIP_PLX = 0 +NVML_BRIDGE_CHIP_BRO4 = 1 +NVML_MAX_PHYSICAL_BRIDGE = 128 + +_nvmlValueType_t = c_uint +NVML_VALUE_TYPE_DOUBLE = 0 +NVML_VALUE_TYPE_UNSIGNED_INT = 1 +NVML_VALUE_TYPE_UNSIGNED_LONG = 2 +NVML_VALUE_TYPE_UNSIGNED_LONG_LONG = 3 +NVML_VALUE_TYPE_COUNT = 4 + +_nvmlPerfPolicyType_t = c_uint +NVML_PERF_POLICY_POWER = 0 +NVML_PERF_POLICY_THERMAL = 1 +NVML_PERF_POLICY_COUNT = 2 + +_nvmlSamplingType_t = c_uint +NVML_TOTAL_POWER_SAMPLES = 0 +NVML_GPU_UTILIZATION_SAMPLES = 1 +NVML_MEMORY_UTILIZATION_SAMPLES = 2 +NVML_ENC_UTILIZATION_SAMPLES = 3 +NVML_DEC_UTILIZATION_SAMPLES = 4 +NVML_PROCESSOR_CLK_SAMPLES = 5 +NVML_MEMORY_CLK_SAMPLES = 6 +NVML_SAMPLINGTYPE_COUNT = 7 + +_nvmlPcieUtilCounter_t = c_uint +NVML_PCIE_UTIL_TX_BYTES = 0 +NVML_PCIE_UTIL_RX_BYTES = 1 +NVML_PCIE_UTIL_COUNT = 2 + +_nvmlGpuTopologyLevel_t = c_uint +NVML_TOPOLOGY_INTERNAL = 0 +NVML_TOPOLOGY_SINGLE = 10 +NVML_TOPOLOGY_MULTIPLE = 20 +NVML_TOPOLOGY_HOSTBRIDGE = 30 +NVML_TOPOLOGY_CPU = 40 +NVML_TOPOLOGY_SYSTEM = 50 + +# C preprocessor defined values +nvmlFlagDefault = 0 +nvmlFlagForce = 1 + +# buffer size +NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE = 16 +NVML_DEVICE_UUID_BUFFER_SIZE = 80 +NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE = 81 +NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE = 80 +NVML_DEVICE_NAME_BUFFER_SIZE = 64 +NVML_DEVICE_SERIAL_BUFFER_SIZE = 30 +NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE = 32 +NVML_DEVICE_PCI_BUS_ID_BUFFER_SIZE = 16 + +NVML_VALUE_NOT_AVAILABLE_ulonglong = c_ulonglong(-1) +NVML_VALUE_NOT_AVAILABLE_uint = c_uint(-1) + +## Lib loading ## +nvmlLib = None +libLoadLock = threading.Lock() +_nvmlLib_refcount = 0 # Incremented on each nvmlInit and decremented on nvmlShutdown + +## Error Checking ## +class NVMLError(Exception): + _valClassMapping = dict() + # List of currently known error codes + _errcode_to_string = { + NVML_ERROR_UNINITIALIZED: "Uninitialized", + NVML_ERROR_INVALID_ARGUMENT: "Invalid Argument", + NVML_ERROR_NOT_SUPPORTED: "Not Supported", + NVML_ERROR_NO_PERMISSION: "Insufficient Permissions", + NVML_ERROR_ALREADY_INITIALIZED: "Already Initialized", + NVML_ERROR_NOT_FOUND: "Not Found", + NVML_ERROR_INSUFFICIENT_SIZE: "Insufficient Size", + NVML_ERROR_INSUFFICIENT_POWER: "Insufficient External Power", + NVML_ERROR_DRIVER_NOT_LOADED: "Driver Not Loaded", + NVML_ERROR_TIMEOUT: "Timeout", + NVML_ERROR_IRQ_ISSUE: "Interrupt Request Issue", + NVML_ERROR_LIBRARY_NOT_FOUND: "NVML Shared Library Not Found", + NVML_ERROR_FUNCTION_NOT_FOUND: "Function Not Found", + NVML_ERROR_CORRUPTED_INFOROM: "Corrupted infoROM", + NVML_ERROR_GPU_IS_LOST: "GPU is lost", + NVML_ERROR_RESET_REQUIRED: "GPU requires restart", + NVML_ERROR_OPERATING_SYSTEM: "The operating system has blocked the request.", + NVML_ERROR_LIB_RM_VERSION_MISMATCH: "RM has detected an NVML/RM version mismatch.", + NVML_ERROR_UNKNOWN: "Unknown Error", + } + def __new__(typ, value): + ''' + Maps value to a proper subclass of NVMLError. + See _extractNVMLErrorsAsClasses function for more details + ''' + if typ == NVMLError: + typ = NVMLError._valClassMapping.get(value, typ) + obj = Exception.__new__(typ) + obj.value = value + return obj + def __str__(self): + try: + if self.value not in NVMLError._errcode_to_string: + NVMLError._errcode_to_string[self.value] = str(nvmlErrorString(self.value)) + return NVMLError._errcode_to_string[self.value] + except NVMLError_Uninitialized: + return "NVML Error with code %d" % self.value + def __eq__(self, other): + return self.value == other.value + +def _extractNVMLErrorsAsClasses(): + ''' + Generates a hierarchy of classes on top of NVMLError class. + + Each NVML Error gets a new NVMLError subclass. This way try,except blocks can filter appropriate + exceptions more easily. + + NVMLError is a parent class. Each NVML_ERROR_* gets it's own subclass. + e.g. NVML_ERROR_ALREADY_INITIALIZED will be turned into NVMLError_AlreadyInitialized + ''' + this_module = sys.modules[__name__] + nvmlErrorsNames = filter(lambda x: x.startswith("NVML_ERROR_"), dir(this_module)) + for err_name in nvmlErrorsNames: + # e.g. Turn NVML_ERROR_ALREADY_INITIALIZED into NVMLError_AlreadyInitialized + class_name = "NVMLError_" + string.capwords(err_name.replace("NVML_ERROR_", ""), "_").replace("_", "") + err_val = getattr(this_module, err_name) + def gen_new(val): + def new(typ): + obj = NVMLError.__new__(typ, val) + return obj + return new + new_error_class = type(class_name, (NVMLError,), {'__new__': gen_new(err_val)}) + new_error_class.__module__ = __name__ + setattr(this_module, class_name, new_error_class) + NVMLError._valClassMapping[err_val] = new_error_class +_extractNVMLErrorsAsClasses() + +def _nvmlCheckReturn(ret): + if (ret != NVML_SUCCESS): + raise NVMLError(ret) + return ret + +## Function access ## +_nvmlGetFunctionPointer_cache = dict() # function pointers are cached to prevent unnecessary libLoadLock locking +def _nvmlGetFunctionPointer(name): + global nvmlLib + + if name in _nvmlGetFunctionPointer_cache: + return _nvmlGetFunctionPointer_cache[name] + + libLoadLock.acquire() + try: + # ensure library was loaded + if (nvmlLib == None): + raise NVMLError(NVML_ERROR_UNINITIALIZED) + try: + _nvmlGetFunctionPointer_cache[name] = getattr(nvmlLib, name) + return _nvmlGetFunctionPointer_cache[name] + except AttributeError: + raise NVMLError(NVML_ERROR_FUNCTION_NOT_FOUND) + finally: + # lock is always freed + libLoadLock.release() + +## Alternative object +# Allows the object to be printed +# Allows mismatched types to be assigned +# - like None when the Structure variant requires c_uint +class nvmlFriendlyObject(object): + def __init__(self, dictionary): + for x in dictionary: + setattr(self, x, dictionary[x]) + def __str__(self): + return self.__dict__.__str__() + +def nvmlStructToFriendlyObject(struct): + d = {} + for x in struct._fields_: + key = x[0] + value = getattr(struct, key) + d[key] = value + obj = nvmlFriendlyObject(d) + return obj + +# pack the object so it can be passed to the NVML library +def nvmlFriendlyObjectToStruct(obj, model): + for x in model._fields_: + key = x[0] + value = obj.__dict__[key] + setattr(model, key, value) + return model + +## Unit structures +class struct_c_nvmlUnit_t(Structure): + pass # opaque handle +c_nvmlUnit_t = POINTER(struct_c_nvmlUnit_t) + +class _PrintableStructure(Structure): + """ + Abstract class that produces nicer __str__ output than ctypes.Structure. + e.g. instead of: + >>> print str(obj) + + this class will print + class_name(field_name: formatted_value, field_name: formatted_value) + + _fmt_ dictionary of -> + e.g. class that has _field_ 'hex_value', c_uint could be formatted with + _fmt_ = {"hex_value" : "%08X"} + to produce nicer output. + Default fomratting string for all fields can be set with key "" like: + _fmt_ = {"" : "%d MHz"} # e.g all values are numbers in MHz. + If not set it's assumed to be just "%s" + + Exact format of returned str from this class is subject to change in the future. + """ + _fmt_ = {} + def __str__(self): + result = [] + for x in self._fields_: + key = x[0] + value = getattr(self, key) + fmt = "%s" + if key in self._fmt_: + fmt = self._fmt_[key] + elif "" in self._fmt_: + fmt = self._fmt_[""] + result.append(("%s: " + fmt) % (key, value)) + return self.__class__.__name__ + "(" + string.join(result, ", ") + ")" + +class c_nvmlUnitInfo_t(_PrintableStructure): + _fields_ = [ + ('name', c_char * 96), + ('id', c_char * 96), + ('serial', c_char * 96), + ('firmwareVersion', c_char * 96), + ] + +class c_nvmlLedState_t(_PrintableStructure): + _fields_ = [ + ('cause', c_char * 256), + ('color', _nvmlLedColor_t), + ] + +class c_nvmlPSUInfo_t(_PrintableStructure): + _fields_ = [ + ('state', c_char * 256), + ('current', c_uint), + ('voltage', c_uint), + ('power', c_uint), + ] + +class c_nvmlUnitFanInfo_t(_PrintableStructure): + _fields_ = [ + ('speed', c_uint), + ('state', _nvmlFanState_t), + ] + +class c_nvmlUnitFanSpeeds_t(_PrintableStructure): + _fields_ = [ + ('fans', c_nvmlUnitFanInfo_t * 24), + ('count', c_uint) + ] + +## Device structures +class struct_c_nvmlDevice_t(Structure): + pass # opaque handle +c_nvmlDevice_t = POINTER(struct_c_nvmlDevice_t) + +class nvmlPciInfo_t(_PrintableStructure): + _fields_ = [ + ('busId', c_char * 16), + ('domain', c_uint), + ('bus', c_uint), + ('device', c_uint), + ('pciDeviceId', c_uint), + + # Added in 2.285 + ('pciSubSystemId', c_uint), + ('reserved0', c_uint), + ('reserved1', c_uint), + ('reserved2', c_uint), + ('reserved3', c_uint), + ] + _fmt_ = { + 'domain' : "0x%04X", + 'bus' : "0x%02X", + 'device' : "0x%02X", + 'pciDeviceId' : "0x%08X", + 'pciSubSystemId' : "0x%08X", + } + +class c_nvmlMemory_t(_PrintableStructure): + _fields_ = [ + ('total', c_ulonglong), + ('free', c_ulonglong), + ('used', c_ulonglong), + ] + _fmt_ = {'': "%d B"} + +class c_nvmlBAR1Memory_t(_PrintableStructure): + _fields_ = [ + ('bar1Total', c_ulonglong), + ('bar1Free', c_ulonglong), + ('bar1Used', c_ulonglong), + ] + _fmt_ = {'': "%d B"} + +# On Windows with the WDDM driver, usedGpuMemory is reported as None +# Code that processes this structure should check for None, I.E. +# +# if (info.usedGpuMemory == None): +# # TODO handle the error +# pass +# else: +# print("Using %d MiB of memory" % (info.usedGpuMemory / 1024 / 1024)) +# +# See NVML documentation for more information +class c_nvmlProcessInfo_t(_PrintableStructure): + _fields_ = [ + ('pid', c_uint), + ('usedGpuMemory', c_ulonglong), + ] + _fmt_ = {'usedGpuMemory': "%d B"} + +class c_nvmlBridgeChipInfo_t(_PrintableStructure): + _fields_ = [ + ('type', _nvmlBridgeChipType_t), + ('fwVersion', c_uint), + ] + +class c_nvmlBridgeChipHierarchy_t(_PrintableStructure): + _fields_ = [ + ('bridgeCount', c_uint), + ('bridgeChipInfo', c_nvmlBridgeChipInfo_t * 128), + ] + +class c_nvmlEccErrorCounts_t(_PrintableStructure): + _fields_ = [ + ('l1Cache', c_ulonglong), + ('l2Cache', c_ulonglong), + ('deviceMemory', c_ulonglong), + ('registerFile', c_ulonglong), + ] + +class c_nvmlUtilization_t(_PrintableStructure): + _fields_ = [ + ('gpu', c_uint), + ('memory', c_uint), + ] + _fmt_ = {'': "%d %%"} + +# Added in 2.285 +class c_nvmlHwbcEntry_t(_PrintableStructure): + _fields_ = [ + ('hwbcId', c_uint), + ('firmwareVersion', c_char * 32), + ] + +class c_nvmlValue_t(Union): + _fields_ = [ + ('dVal', c_double), + ('uiVal', c_uint), + ('ulVal', c_ulong), + ('ullVal', c_ulonglong), + ] + +class c_nvmlSample_t(_PrintableStructure): + _fields_ = [ + ('timeStamp', c_ulonglong), + ('sampleValue', c_nvmlValue_t), + ] + +class c_nvmlViolationTime_t(_PrintableStructure): + _fields_ = [ + ('referenceTime', c_ulonglong), + ('violationTime', c_ulonglong), + ] + +## Event structures +class struct_c_nvmlEventSet_t(Structure): + pass # opaque handle +c_nvmlEventSet_t = POINTER(struct_c_nvmlEventSet_t) + +nvmlEventTypeSingleBitEccError = 0x0000000000000001 +nvmlEventTypeDoubleBitEccError = 0x0000000000000002 +nvmlEventTypePState = 0x0000000000000004 +nvmlEventTypeXidCriticalError = 0x0000000000000008 +nvmlEventTypeClock = 0x0000000000000010 +nvmlEventTypeNone = 0x0000000000000000 +nvmlEventTypeAll = ( + nvmlEventTypeNone | + nvmlEventTypeSingleBitEccError | + nvmlEventTypeDoubleBitEccError | + nvmlEventTypePState | + nvmlEventTypeClock | + nvmlEventTypeXidCriticalError + ) + +## Clock Throttle Reasons defines +nvmlClocksThrottleReasonGpuIdle = 0x0000000000000001 +nvmlClocksThrottleReasonApplicationsClocksSetting = 0x0000000000000002 +nvmlClocksThrottleReasonUserDefinedClocks = nvmlClocksThrottleReasonApplicationsClocksSetting # deprecated, use nvmlClocksThrottleReasonApplicationsClocksSetting +nvmlClocksThrottleReasonSwPowerCap = 0x0000000000000004 +nvmlClocksThrottleReasonHwSlowdown = 0x0000000000000008 +nvmlClocksThrottleReasonUnknown = 0x8000000000000000 +nvmlClocksThrottleReasonNone = 0x0000000000000000 +nvmlClocksThrottleReasonAll = ( + nvmlClocksThrottleReasonNone | + nvmlClocksThrottleReasonGpuIdle | + nvmlClocksThrottleReasonApplicationsClocksSetting | + nvmlClocksThrottleReasonSwPowerCap | + nvmlClocksThrottleReasonHwSlowdown | + nvmlClocksThrottleReasonUnknown + ) + +class c_nvmlEventData_t(_PrintableStructure): + _fields_ = [ + ('device', c_nvmlDevice_t), + ('eventType', c_ulonglong), + ('eventData', c_ulonglong) + ] + _fmt_ = {'eventType': "0x%08X"} + +class c_nvmlAccountingStats_t(_PrintableStructure): + _fields_ = [ + ('gpuUtilization', c_uint), + ('memoryUtilization', c_uint), + ('maxMemoryUsage', c_ulonglong), + ('time', c_ulonglong), + ('startTime', c_ulonglong), + ('isRunning', c_uint), + ('reserved', c_uint * 5) + ] + +## C function wrappers ## +def nvmlInit(): + _LoadNvmlLibrary() + + # + # Initialize the library + # + fn = _nvmlGetFunctionPointer("nvmlInit_v2") + ret = fn() + _nvmlCheckReturn(ret) + + # Atomically update refcount + global _nvmlLib_refcount + libLoadLock.acquire() + _nvmlLib_refcount += 1 + libLoadLock.release() + return None + +def _LoadNvmlLibrary(): + ''' + Load the library if it isn't loaded already + ''' + global nvmlLib + + if (nvmlLib == None): + # lock to ensure only one caller loads the library + libLoadLock.acquire() + + try: + # ensure the library still isn't loaded + if (nvmlLib == None): + try: + if (sys.platform[:3] == "win"): + # cdecl calling convention + # load nvml.dll from %ProgramFiles%/NVIDIA Corporation/NVSMI/nvml.dll + nvmlLib = CDLL(os.path.join(os.getenv("ProgramFiles", "C:/Program Files"), "NVIDIA Corporation/NVSMI/nvml.dll")) + else: + # assume linux + nvmlLib = CDLL("libnvidia-ml.so.1") + except OSError as ose: + _nvmlCheckReturn(NVML_ERROR_LIBRARY_NOT_FOUND) + if (nvmlLib == None): + _nvmlCheckReturn(NVML_ERROR_LIBRARY_NOT_FOUND) + finally: + # lock is always freed + libLoadLock.release() + +def nvmlShutdown(): + # + # Leave the library loaded, but shutdown the interface + # + fn = _nvmlGetFunctionPointer("nvmlShutdown") + ret = fn() + _nvmlCheckReturn(ret) + + # Atomically update refcount + global _nvmlLib_refcount + libLoadLock.acquire() + if (0 < _nvmlLib_refcount): + _nvmlLib_refcount -= 1 + libLoadLock.release() + return None + +# Added in 2.285 +def nvmlErrorString(result): + fn = _nvmlGetFunctionPointer("nvmlErrorString") + fn.restype = c_char_p # otherwise return is an int + ret = fn(result) + return ret + +# Added in 2.285 +def nvmlSystemGetNVMLVersion(): + c_version = create_string_buffer(NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE) + fn = _nvmlGetFunctionPointer("nvmlSystemGetNVMLVersion") + ret = fn(c_version, c_uint(NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE)) + _nvmlCheckReturn(ret) + return c_version.value + +# Added in 2.285 +def nvmlSystemGetProcessName(pid): + c_name = create_string_buffer(1024) + fn = _nvmlGetFunctionPointer("nvmlSystemGetProcessName") + ret = fn(c_uint(pid), c_name, c_uint(1024)) + _nvmlCheckReturn(ret) + return c_name.value + +def nvmlSystemGetDriverVersion(): + c_version = create_string_buffer(NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE) + fn = _nvmlGetFunctionPointer("nvmlSystemGetDriverVersion") + ret = fn(c_version, c_uint(NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE)) + _nvmlCheckReturn(ret) + return c_version.value + +# Added in 2.285 +def nvmlSystemGetHicVersion(): + c_count = c_uint(0) + hics = None + fn = _nvmlGetFunctionPointer("nvmlSystemGetHicVersion") + + # get the count + ret = fn(byref(c_count), None) + + # this should only fail with insufficient size + if ((ret != NVML_SUCCESS) and + (ret != NVML_ERROR_INSUFFICIENT_SIZE)): + raise NVMLError(ret) + + # if there are no hics + if (c_count.value == 0): + return [] + + hic_array = c_nvmlHwbcEntry_t * c_count.value + hics = hic_array() + ret = fn(byref(c_count), hics) + _nvmlCheckReturn(ret) + return hics + +## Unit get functions +def nvmlUnitGetCount(): + c_count = c_uint() + fn = _nvmlGetFunctionPointer("nvmlUnitGetCount") + ret = fn(byref(c_count)) + _nvmlCheckReturn(ret) + return c_count.value + +def nvmlUnitGetHandleByIndex(index): + c_index = c_uint(index) + unit = c_nvmlUnit_t() + fn = _nvmlGetFunctionPointer("nvmlUnitGetHandleByIndex") + ret = fn(c_index, byref(unit)) + _nvmlCheckReturn(ret) + return unit + +def nvmlUnitGetUnitInfo(unit): + c_info = c_nvmlUnitInfo_t() + fn = _nvmlGetFunctionPointer("nvmlUnitGetUnitInfo") + ret = fn(unit, byref(c_info)) + _nvmlCheckReturn(ret) + return c_info + +def nvmlUnitGetLedState(unit): + c_state = c_nvmlLedState_t() + fn = _nvmlGetFunctionPointer("nvmlUnitGetLedState") + ret = fn(unit, byref(c_state)) + _nvmlCheckReturn(ret) + return c_state + +def nvmlUnitGetPsuInfo(unit): + c_info = c_nvmlPSUInfo_t() + fn = _nvmlGetFunctionPointer("nvmlUnitGetPsuInfo") + ret = fn(unit, byref(c_info)) + _nvmlCheckReturn(ret) + return c_info + +def nvmlUnitGetTemperature(unit, type): + c_temp = c_uint() + fn = _nvmlGetFunctionPointer("nvmlUnitGetTemperature") + ret = fn(unit, c_uint(type), byref(c_temp)) + _nvmlCheckReturn(ret) + return c_temp.value + +def nvmlUnitGetFanSpeedInfo(unit): + c_speeds = c_nvmlUnitFanSpeeds_t() + fn = _nvmlGetFunctionPointer("nvmlUnitGetFanSpeedInfo") + ret = fn(unit, byref(c_speeds)) + _nvmlCheckReturn(ret) + return c_speeds + +# added to API +def nvmlUnitGetDeviceCount(unit): + c_count = c_uint(0) + # query the unit to determine device count + fn = _nvmlGetFunctionPointer("nvmlUnitGetDevices") + ret = fn(unit, byref(c_count), None) + if (ret == NVML_ERROR_INSUFFICIENT_SIZE): + ret = NVML_SUCCESS + _nvmlCheckReturn(ret) + return c_count.value + +def nvmlUnitGetDevices(unit): + c_count = c_uint(nvmlUnitGetDeviceCount(unit)) + device_array = c_nvmlDevice_t * c_count.value + c_devices = device_array() + fn = _nvmlGetFunctionPointer("nvmlUnitGetDevices") + ret = fn(unit, byref(c_count), c_devices) + _nvmlCheckReturn(ret) + return c_devices + +## Device get functions +def nvmlDeviceGetCount(): + c_count = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetCount_v2") + ret = fn(byref(c_count)) + _nvmlCheckReturn(ret) + return c_count.value + +def nvmlDeviceGetHandleByIndex(index): + c_index = c_uint(index) + device = c_nvmlDevice_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetHandleByIndex_v2") + ret = fn(c_index, byref(device)) + _nvmlCheckReturn(ret) + return device + +def nvmlDeviceGetHandleBySerial(serial): + c_serial = c_char_p(serial) + device = c_nvmlDevice_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetHandleBySerial") + ret = fn(c_serial, byref(device)) + _nvmlCheckReturn(ret) + return device + +def nvmlDeviceGetHandleByUUID(uuid): + c_uuid = c_char_p(uuid) + device = c_nvmlDevice_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetHandleByUUID") + ret = fn(c_uuid, byref(device)) + _nvmlCheckReturn(ret) + return device + +def nvmlDeviceGetHandleByPciBusId(pciBusId): + c_busId = c_char_p(pciBusId) + device = c_nvmlDevice_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetHandleByPciBusId_v2") + ret = fn(c_busId, byref(device)) + _nvmlCheckReturn(ret) + return device + +def nvmlDeviceGetName(handle): + c_name = create_string_buffer(NVML_DEVICE_NAME_BUFFER_SIZE) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetName") + ret = fn(handle, c_name, c_uint(NVML_DEVICE_NAME_BUFFER_SIZE)) + _nvmlCheckReturn(ret) + return c_name.value + +def nvmlDeviceGetBoardId(handle): + c_id = c_uint(); + fn = _nvmlGetFunctionPointer("nvmlDeviceGetBoardId") + ret = fn(handle, byref(c_id)) + _nvmlCheckReturn(ret) + return c_id.value + +def nvmlDeviceGetMultiGpuBoard(handle): + c_multiGpu = c_uint(); + fn = _nvmlGetFunctionPointer("nvmlDeviceGetMultiGpuBoard") + ret = fn(handle, byref(c_multiGpu)) + _nvmlCheckReturn(ret) + return c_multiGpu.value + +def nvmlDeviceGetBrand(handle): + c_type = _nvmlBrandType_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetBrand") + ret = fn(handle, byref(c_type)) + _nvmlCheckReturn(ret) + return c_type.value + +def nvmlDeviceGetSerial(handle): + c_serial = create_string_buffer(NVML_DEVICE_SERIAL_BUFFER_SIZE) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetSerial") + ret = fn(handle, c_serial, c_uint(NVML_DEVICE_SERIAL_BUFFER_SIZE)) + _nvmlCheckReturn(ret) + return c_serial.value + +def nvmlDeviceGetCpuAffinity(handle, cpuSetSize): + affinity_array = c_ulonglong * cpuSetSize + c_affinity = affinity_array() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetCpuAffinity") + ret = fn(handle, cpuSetSize, byref(c_affinity)) + _nvmlCheckReturn(ret) + return c_affinity + +def nvmlDeviceSetCpuAffinity(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetCpuAffinity") + ret = fn(handle) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceClearCpuAffinity(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceClearCpuAffinity") + ret = fn(handle) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceGetMinorNumber(handle): + c_minor_number = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetMinorNumber") + ret = fn(handle, byref(c_minor_number)) + _nvmlCheckReturn(ret) + return c_minor_number.value + +def nvmlDeviceGetUUID(handle): + c_uuid = create_string_buffer(NVML_DEVICE_UUID_BUFFER_SIZE) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetUUID") + ret = fn(handle, c_uuid, c_uint(NVML_DEVICE_UUID_BUFFER_SIZE)) + _nvmlCheckReturn(ret) + return c_uuid.value + +def nvmlDeviceGetInforomVersion(handle, infoRomObject): + c_version = create_string_buffer(NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetInforomVersion") + ret = fn(handle, _nvmlInforomObject_t(infoRomObject), + c_version, c_uint(NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE)) + _nvmlCheckReturn(ret) + return c_version.value + +# Added in 4.304 +def nvmlDeviceGetInforomImageVersion(handle): + c_version = create_string_buffer(NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetInforomImageVersion") + ret = fn(handle, c_version, c_uint(NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE)) + _nvmlCheckReturn(ret) + return c_version.value + +# Added in 4.304 +def nvmlDeviceGetInforomConfigurationChecksum(handle): + c_checksum = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetInforomConfigurationChecksum") + ret = fn(handle, byref(c_checksum)) + _nvmlCheckReturn(ret) + return c_checksum.value + +# Added in 4.304 +def nvmlDeviceValidateInforom(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceValidateInforom") + ret = fn(handle) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceGetDisplayMode(handle): + c_mode = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetDisplayMode") + ret = fn(handle, byref(c_mode)) + _nvmlCheckReturn(ret) + return c_mode.value + +def nvmlDeviceGetDisplayActive(handle): + c_mode = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetDisplayActive") + ret = fn(handle, byref(c_mode)) + _nvmlCheckReturn(ret) + return c_mode.value + + +def nvmlDeviceGetPersistenceMode(handle): + c_state = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPersistenceMode") + ret = fn(handle, byref(c_state)) + _nvmlCheckReturn(ret) + return c_state.value + +def nvmlDeviceGetPciInfo(handle): + c_info = nvmlPciInfo_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPciInfo_v2") + ret = fn(handle, byref(c_info)) + _nvmlCheckReturn(ret) + return c_info + +def nvmlDeviceGetClockInfo(handle, type): + c_clock = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetClockInfo") + ret = fn(handle, _nvmlClockType_t(type), byref(c_clock)) + _nvmlCheckReturn(ret) + return c_clock.value + +# Added in 2.285 +def nvmlDeviceGetMaxClockInfo(handle, type): + c_clock = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetMaxClockInfo") + ret = fn(handle, _nvmlClockType_t(type), byref(c_clock)) + _nvmlCheckReturn(ret) + return c_clock.value + +# Added in 4.304 +def nvmlDeviceGetApplicationsClock(handle, type): + c_clock = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetApplicationsClock") + ret = fn(handle, _nvmlClockType_t(type), byref(c_clock)) + _nvmlCheckReturn(ret) + return c_clock.value + +# Added in 5.319 +def nvmlDeviceGetDefaultApplicationsClock(handle, type): + c_clock = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetDefaultApplicationsClock") + ret = fn(handle, _nvmlClockType_t(type), byref(c_clock)) + _nvmlCheckReturn(ret) + return c_clock.value + +# Added in 4.304 +def nvmlDeviceGetSupportedMemoryClocks(handle): + # first call to get the size + c_count = c_uint(0) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetSupportedMemoryClocks") + ret = fn(handle, byref(c_count), None) + + if (ret == NVML_SUCCESS): + # special case, no clocks + return [] + elif (ret == NVML_ERROR_INSUFFICIENT_SIZE): + # typical case + clocks_array = c_uint * c_count.value + c_clocks = clocks_array() + + # make the call again + ret = fn(handle, byref(c_count), c_clocks) + _nvmlCheckReturn(ret) + + procs = [] + for i in range(c_count.value): + procs.append(c_clocks[i]) + + return procs + else: + # error case + raise NVMLError(ret) + +# Added in 4.304 +def nvmlDeviceGetSupportedGraphicsClocks(handle, memoryClockMHz): + # first call to get the size + c_count = c_uint(0) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetSupportedGraphicsClocks") + ret = fn(handle, c_uint(memoryClockMHz), byref(c_count), None) + + if (ret == NVML_SUCCESS): + # special case, no clocks + return [] + elif (ret == NVML_ERROR_INSUFFICIENT_SIZE): + # typical case + clocks_array = c_uint * c_count.value + c_clocks = clocks_array() + + # make the call again + ret = fn(handle, c_uint(memoryClockMHz), byref(c_count), c_clocks) + _nvmlCheckReturn(ret) + + procs = [] + for i in range(c_count.value): + procs.append(c_clocks[i]) + + return procs + else: + # error case + raise NVMLError(ret) + +def nvmlDeviceGetFanSpeed(handle): + c_speed = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetFanSpeed") + ret = fn(handle, byref(c_speed)) + _nvmlCheckReturn(ret) + return c_speed.value + +def nvmlDeviceGetTemperature(handle, sensor): + c_temp = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetTemperature") + ret = fn(handle, _nvmlTemperatureSensors_t(sensor), byref(c_temp)) + _nvmlCheckReturn(ret) + return c_temp.value + +def nvmlDeviceGetTemperatureThreshold(handle, threshold): + c_temp = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetTemperatureThreshold") + ret = fn(handle, _nvmlTemperatureThresholds_t(threshold), byref(c_temp)) + _nvmlCheckReturn(ret) + return c_temp.value + +# DEPRECATED use nvmlDeviceGetPerformanceState +def nvmlDeviceGetPowerState(handle): + c_pstate = _nvmlPstates_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerState") + ret = fn(handle, byref(c_pstate)) + _nvmlCheckReturn(ret) + return c_pstate.value + +def nvmlDeviceGetPerformanceState(handle): + c_pstate = _nvmlPstates_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPerformanceState") + ret = fn(handle, byref(c_pstate)) + _nvmlCheckReturn(ret) + return c_pstate.value + +def nvmlDeviceGetPowerManagementMode(handle): + c_pcapMode = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerManagementMode") + ret = fn(handle, byref(c_pcapMode)) + _nvmlCheckReturn(ret) + return c_pcapMode.value + +def nvmlDeviceGetPowerManagementLimit(handle): + c_limit = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerManagementLimit") + ret = fn(handle, byref(c_limit)) + _nvmlCheckReturn(ret) + return c_limit.value + +# Added in 4.304 +def nvmlDeviceGetPowerManagementLimitConstraints(handle): + c_minLimit = c_uint() + c_maxLimit = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerManagementLimitConstraints") + ret = fn(handle, byref(c_minLimit), byref(c_maxLimit)) + _nvmlCheckReturn(ret) + return [c_minLimit.value, c_maxLimit.value] + +# Added in 4.304 +def nvmlDeviceGetPowerManagementDefaultLimit(handle): + c_limit = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerManagementDefaultLimit") + ret = fn(handle, byref(c_limit)) + _nvmlCheckReturn(ret) + return c_limit.value + + +# Added in 331 +def nvmlDeviceGetEnforcedPowerLimit(handle): + c_limit = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetEnforcedPowerLimit") + ret = fn(handle, byref(c_limit)) + _nvmlCheckReturn(ret) + return c_limit.value + +def nvmlDeviceGetPowerUsage(handle): + c_watts = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerUsage") + ret = fn(handle, byref(c_watts)) + _nvmlCheckReturn(ret) + return c_watts.value + +# Added in 4.304 +def nvmlDeviceGetGpuOperationMode(handle): + c_currState = _nvmlGpuOperationMode_t() + c_pendingState = _nvmlGpuOperationMode_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetGpuOperationMode") + ret = fn(handle, byref(c_currState), byref(c_pendingState)) + _nvmlCheckReturn(ret) + return [c_currState.value, c_pendingState.value] + +# Added in 4.304 +def nvmlDeviceGetCurrentGpuOperationMode(handle): + return nvmlDeviceGetGpuOperationMode(handle)[0] + +# Added in 4.304 +def nvmlDeviceGetPendingGpuOperationMode(handle): + return nvmlDeviceGetGpuOperationMode(handle)[1] + +def nvmlDeviceGetMemoryInfo(handle): + c_memory = c_nvmlMemory_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetMemoryInfo") + ret = fn(handle, byref(c_memory)) + _nvmlCheckReturn(ret) + return c_memory + +def nvmlDeviceGetBAR1MemoryInfo(handle): + c_bar1_memory = c_nvmlBAR1Memory_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetBAR1MemoryInfo") + ret = fn(handle, byref(c_bar1_memory)) + _nvmlCheckReturn(ret) + return c_bar1_memory + +def nvmlDeviceGetComputeMode(handle): + c_mode = _nvmlComputeMode_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetComputeMode") + ret = fn(handle, byref(c_mode)) + _nvmlCheckReturn(ret) + return c_mode.value + +def nvmlDeviceGetEccMode(handle): + c_currState = _nvmlEnableState_t() + c_pendingState = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetEccMode") + ret = fn(handle, byref(c_currState), byref(c_pendingState)) + _nvmlCheckReturn(ret) + return [c_currState.value, c_pendingState.value] + +# added to API +def nvmlDeviceGetCurrentEccMode(handle): + return nvmlDeviceGetEccMode(handle)[0] + +# added to API +def nvmlDeviceGetPendingEccMode(handle): + return nvmlDeviceGetEccMode(handle)[1] + +def nvmlDeviceGetTotalEccErrors(handle, errorType, counterType): + c_count = c_ulonglong() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetTotalEccErrors") + ret = fn(handle, _nvmlMemoryErrorType_t(errorType), + _nvmlEccCounterType_t(counterType), byref(c_count)) + _nvmlCheckReturn(ret) + return c_count.value + +# This is deprecated, instead use nvmlDeviceGetMemoryErrorCounter +def nvmlDeviceGetDetailedEccErrors(handle, errorType, counterType): + c_counts = c_nvmlEccErrorCounts_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetDetailedEccErrors") + ret = fn(handle, _nvmlMemoryErrorType_t(errorType), + _nvmlEccCounterType_t(counterType), byref(c_counts)) + _nvmlCheckReturn(ret) + return c_counts + +# Added in 4.304 +def nvmlDeviceGetMemoryErrorCounter(handle, errorType, counterType, locationType): + c_count = c_ulonglong() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetMemoryErrorCounter") + ret = fn(handle, + _nvmlMemoryErrorType_t(errorType), + _nvmlEccCounterType_t(counterType), + _nvmlMemoryLocation_t(locationType), + byref(c_count)) + _nvmlCheckReturn(ret) + return c_count.value + +def nvmlDeviceGetUtilizationRates(handle): + c_util = c_nvmlUtilization_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetUtilizationRates") + ret = fn(handle, byref(c_util)) + _nvmlCheckReturn(ret) + return c_util + +def nvmlDeviceGetEncoderUtilization(handle): + c_util = c_uint() + c_samplingPeriod = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetEncoderUtilization") + ret = fn(handle, byref(c_util), byref(c_samplingPeriod)) + _nvmlCheckReturn(ret) + return [c_util.value, c_samplingPeriod.value] + +def nvmlDeviceGetDecoderUtilization(handle): + c_util = c_uint() + c_samplingPeriod = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetDecoderUtilization") + ret = fn(handle, byref(c_util), byref(c_samplingPeriod)) + _nvmlCheckReturn(ret) + return [c_util.value, c_samplingPeriod.value] + +def nvmlDeviceGetPcieReplayCounter(handle): + c_replay = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPcieReplayCounter") + ret = fn(handle, byref(c_replay)) + _nvmlCheckReturn(ret) + return c_replay.value + +def nvmlDeviceGetDriverModel(handle): + c_currModel = _nvmlDriverModel_t() + c_pendingModel = _nvmlDriverModel_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetDriverModel") + ret = fn(handle, byref(c_currModel), byref(c_pendingModel)) + _nvmlCheckReturn(ret) + return [c_currModel.value, c_pendingModel.value] + +# added to API +def nvmlDeviceGetCurrentDriverModel(handle): + return nvmlDeviceGetDriverModel(handle)[0] + +# added to API +def nvmlDeviceGetPendingDriverModel(handle): + return nvmlDeviceGetDriverModel(handle)[1] + +# Added in 2.285 +def nvmlDeviceGetVbiosVersion(handle): + c_version = create_string_buffer(NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetVbiosVersion") + ret = fn(handle, c_version, c_uint(NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE)) + _nvmlCheckReturn(ret) + return c_version.value + +# Added in 2.285 +def nvmlDeviceGetComputeRunningProcesses(handle): + # first call to get the size + c_count = c_uint(0) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetComputeRunningProcesses") + ret = fn(handle, byref(c_count), None) + + if (ret == NVML_SUCCESS): + # special case, no running processes + return [] + elif (ret == NVML_ERROR_INSUFFICIENT_SIZE): + # typical case + # oversize the array incase more processes are created + c_count.value = c_count.value * 2 + 5 + proc_array = c_nvmlProcessInfo_t * c_count.value + c_procs = proc_array() + + # make the call again + ret = fn(handle, byref(c_count), c_procs) + _nvmlCheckReturn(ret) + + procs = [] + for i in range(c_count.value): + # use an alternative struct for this object + obj = nvmlStructToFriendlyObject(c_procs[i]) + if (obj.usedGpuMemory == NVML_VALUE_NOT_AVAILABLE_ulonglong.value): + # special case for WDDM on Windows, see comment above + obj.usedGpuMemory = None + procs.append(obj) + + return procs + else: + # error case + raise NVMLError(ret) + +def nvmlDeviceGetGraphicsRunningProcesses(handle): + # first call to get the size + c_count = c_uint(0) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetGraphicsRunningProcesses") + ret = fn(handle, byref(c_count), None) + + if (ret == NVML_SUCCESS): + # special case, no running processes + return [] + elif (ret == NVML_ERROR_INSUFFICIENT_SIZE): + # typical case + # oversize the array incase more processes are created + c_count.value = c_count.value * 2 + 5 + proc_array = c_nvmlProcessInfo_t * c_count.value + c_procs = proc_array() + + # make the call again + ret = fn(handle, byref(c_count), c_procs) + _nvmlCheckReturn(ret) + + procs = [] + for i in range(c_count.value): + # use an alternative struct for this object + obj = nvmlStructToFriendlyObject(c_procs[i]) + if (obj.usedGpuMemory == NVML_VALUE_NOT_AVAILABLE_ulonglong.value): + # special case for WDDM on Windows, see comment above + obj.usedGpuMemory = None + procs.append(obj) + + return procs + else: + # error case + raise NVMLError(ret) + +def nvmlDeviceGetAutoBoostedClocksEnabled(handle): + c_isEnabled = _nvmlEnableState_t() + c_defaultIsEnabled = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetAutoBoostedClocksEnabled") + ret = fn(handle, byref(c_isEnabled), byref(c_defaultIsEnabled)) + _nvmlCheckReturn(ret) + return [c_isEnabled.value, c_defaultIsEnabled.value] + #Throws NVML_ERROR_NOT_SUPPORTED if hardware doesn't support setting auto boosted clocks + +## Set functions +def nvmlUnitSetLedState(unit, color): + fn = _nvmlGetFunctionPointer("nvmlUnitSetLedState") + ret = fn(unit, _nvmlLedColor_t(color)) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceSetPersistenceMode(handle, mode): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetPersistenceMode") + ret = fn(handle, _nvmlEnableState_t(mode)) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceSetComputeMode(handle, mode): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetComputeMode") + ret = fn(handle, _nvmlComputeMode_t(mode)) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceSetEccMode(handle, mode): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetEccMode") + ret = fn(handle, _nvmlEnableState_t(mode)) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceClearEccErrorCounts(handle, counterType): + fn = _nvmlGetFunctionPointer("nvmlDeviceClearEccErrorCounts") + ret = fn(handle, _nvmlEccCounterType_t(counterType)) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceSetDriverModel(handle, model): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetDriverModel") + ret = fn(handle, _nvmlDriverModel_t(model)) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceSetAutoBoostedClocksEnabled(handle, enabled): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetAutoBoostedClocksEnabled") + ret = fn(handle, _nvmlEnableState_t(enabled)) + _nvmlCheckReturn(ret) + return None + #Throws NVML_ERROR_NOT_SUPPORTED if hardware doesn't support setting auto boosted clocks + +def nvmlDeviceSetDefaultAutoBoostedClocksEnabled(handle, enabled, flags): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetDefaultAutoBoostedClocksEnabled") + ret = fn(handle, _nvmlEnableState_t(enabled), c_uint(flags)) + _nvmlCheckReturn(ret) + return None + #Throws NVML_ERROR_NOT_SUPPORTED if hardware doesn't support setting auto boosted clocks + +# Added in 4.304 +def nvmlDeviceSetApplicationsClocks(handle, maxMemClockMHz, maxGraphicsClockMHz): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetApplicationsClocks") + ret = fn(handle, c_uint(maxMemClockMHz), c_uint(maxGraphicsClockMHz)) + _nvmlCheckReturn(ret) + return None + +# Added in 4.304 +def nvmlDeviceResetApplicationsClocks(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceResetApplicationsClocks") + ret = fn(handle) + _nvmlCheckReturn(ret) + return None + +# Added in 4.304 +def nvmlDeviceSetPowerManagementLimit(handle, limit): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetPowerManagementLimit") + ret = fn(handle, c_uint(limit)) + _nvmlCheckReturn(ret) + return None + +# Added in 4.304 +def nvmlDeviceSetGpuOperationMode(handle, mode): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetGpuOperationMode") + ret = fn(handle, _nvmlGpuOperationMode_t(mode)) + _nvmlCheckReturn(ret) + return None + +# Added in 2.285 +def nvmlEventSetCreate(): + fn = _nvmlGetFunctionPointer("nvmlEventSetCreate") + eventSet = c_nvmlEventSet_t() + ret = fn(byref(eventSet)) + _nvmlCheckReturn(ret) + return eventSet + +# Added in 2.285 +def nvmlDeviceRegisterEvents(handle, eventTypes, eventSet): + fn = _nvmlGetFunctionPointer("nvmlDeviceRegisterEvents") + ret = fn(handle, c_ulonglong(eventTypes), eventSet) + _nvmlCheckReturn(ret) + return None + +# Added in 2.285 +def nvmlDeviceGetSupportedEventTypes(handle): + c_eventTypes = c_ulonglong() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetSupportedEventTypes") + ret = fn(handle, byref(c_eventTypes)) + _nvmlCheckReturn(ret) + return c_eventTypes.value + +# Added in 2.285 +# raises NVML_ERROR_TIMEOUT exception on timeout +def nvmlEventSetWait(eventSet, timeoutms): + fn = _nvmlGetFunctionPointer("nvmlEventSetWait") + data = c_nvmlEventData_t() + ret = fn(eventSet, byref(data), c_uint(timeoutms)) + _nvmlCheckReturn(ret) + return data + +# Added in 2.285 +def nvmlEventSetFree(eventSet): + fn = _nvmlGetFunctionPointer("nvmlEventSetFree") + ret = fn(eventSet) + _nvmlCheckReturn(ret) + return None + +# Added in 3.295 +def nvmlDeviceOnSameBoard(handle1, handle2): + fn = _nvmlGetFunctionPointer("nvmlDeviceOnSameBoard") + onSameBoard = c_int() + ret = fn(handle1, handle2, byref(onSameBoard)) + _nvmlCheckReturn(ret) + return (onSameBoard.value != 0) + +# Added in 3.295 +def nvmlDeviceGetCurrPcieLinkGeneration(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceGetCurrPcieLinkGeneration") + gen = c_uint() + ret = fn(handle, byref(gen)) + _nvmlCheckReturn(ret) + return gen.value + +# Added in 3.295 +def nvmlDeviceGetMaxPcieLinkGeneration(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceGetMaxPcieLinkGeneration") + gen = c_uint() + ret = fn(handle, byref(gen)) + _nvmlCheckReturn(ret) + return gen.value + +# Added in 3.295 +def nvmlDeviceGetCurrPcieLinkWidth(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceGetCurrPcieLinkWidth") + width = c_uint() + ret = fn(handle, byref(width)) + _nvmlCheckReturn(ret) + return width.value + +# Added in 3.295 +def nvmlDeviceGetMaxPcieLinkWidth(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceGetMaxPcieLinkWidth") + width = c_uint() + ret = fn(handle, byref(width)) + _nvmlCheckReturn(ret) + return width.value + +# Added in 4.304 +def nvmlDeviceGetSupportedClocksThrottleReasons(handle): + c_reasons= c_ulonglong() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetSupportedClocksThrottleReasons") + ret = fn(handle, byref(c_reasons)) + _nvmlCheckReturn(ret) + return c_reasons.value + +# Added in 4.304 +def nvmlDeviceGetCurrentClocksThrottleReasons(handle): + c_reasons= c_ulonglong() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetCurrentClocksThrottleReasons") + ret = fn(handle, byref(c_reasons)) + _nvmlCheckReturn(ret) + return c_reasons.value + +# Added in 5.319 +def nvmlDeviceGetIndex(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceGetIndex") + c_index = c_uint() + ret = fn(handle, byref(c_index)) + _nvmlCheckReturn(ret) + return c_index.value + +# Added in 5.319 +def nvmlDeviceGetAccountingMode(handle): + c_mode = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetAccountingMode") + ret = fn(handle, byref(c_mode)) + _nvmlCheckReturn(ret) + return c_mode.value + +def nvmlDeviceSetAccountingMode(handle, mode): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetAccountingMode") + ret = fn(handle, _nvmlEnableState_t(mode)) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceClearAccountingPids(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceClearAccountingPids") + ret = fn(handle) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceGetAccountingStats(handle, pid): + stats = c_nvmlAccountingStats_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetAccountingStats") + ret = fn(handle, c_uint(pid), byref(stats)) + _nvmlCheckReturn(ret) + if (stats.maxMemoryUsage == NVML_VALUE_NOT_AVAILABLE_ulonglong.value): + # special case for WDDM on Windows, see comment above + stats.maxMemoryUsage = None + return stats + +def nvmlDeviceGetAccountingPids(handle): + count = c_uint(nvmlDeviceGetAccountingBufferSize(handle)) + pids = (c_uint * count.value)() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetAccountingPids") + ret = fn(handle, byref(count), pids) + _nvmlCheckReturn(ret) + return map(int, pids[0:count.value]) + +def nvmlDeviceGetAccountingBufferSize(handle): + bufferSize = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetAccountingBufferSize") + ret = fn(handle, byref(bufferSize)) + _nvmlCheckReturn(ret) + return int(bufferSize.value) + +def nvmlDeviceGetRetiredPages(device, sourceFilter): + c_source = _nvmlPageRetirementCause_t(sourceFilter) + c_count = c_uint(0) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetRetiredPages") + + # First call will get the size + ret = fn(device, c_source, byref(c_count), None) + + # this should only fail with insufficient size + if ((ret != NVML_SUCCESS) and + (ret != NVML_ERROR_INSUFFICIENT_SIZE)): + raise NVMLError(ret) + + # call again with a buffer + # oversize the array for the rare cases where additional pages + # are retired between NVML calls + c_count.value = c_count.value * 2 + 5 + page_array = c_ulonglong * c_count.value + c_pages = page_array() + ret = fn(device, c_source, byref(c_count), c_pages) + _nvmlCheckReturn(ret) + return map(int, c_pages[0:c_count.value]) + +def nvmlDeviceGetRetiredPagesPendingStatus(device): + c_pending = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetRetiredPagesPendingStatus") + ret = fn(device, byref(c_pending)) + _nvmlCheckReturn(ret) + return int(c_pending.value) + +def nvmlDeviceGetAPIRestriction(device, apiType): + c_permission = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetAPIRestriction") + ret = fn(device, _nvmlRestrictedAPI_t(apiType), byref(c_permission)) + _nvmlCheckReturn(ret) + return int(c_permission.value) + +def nvmlDeviceSetAPIRestriction(handle, apiType, isRestricted): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetAPIRestriction") + ret = fn(handle, _nvmlRestrictedAPI_t(apiType), _nvmlEnableState_t(isRestricted)) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceGetBridgeChipInfo(handle): + bridgeHierarchy = c_nvmlBridgeChipHierarchy_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetBridgeChipInfo") + ret = fn(handle, byref(bridgeHierarchy)) + _nvmlCheckReturn(ret) + return bridgeHierarchy + +def nvmlDeviceGetSamples(device, sampling_type, timeStamp): + c_sampling_type = _nvmlSamplingType_t(sampling_type) + c_time_stamp = c_ulonglong(timeStamp) + c_sample_count = c_uint(0) + c_sample_value_type = _nvmlValueType_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetSamples") + + ## First Call gets the size + ret = fn(device, c_sampling_type, c_time_stamp, byref(c_sample_value_type), byref(c_sample_count), None) + + # Stop if this fails + if (ret != NVML_SUCCESS): + raise NVMLError(ret) + + sampleArray = c_sample_count.value * c_nvmlSample_t + c_samples = sampleArray() + ret = fn(device, c_sampling_type, c_time_stamp, byref(c_sample_value_type), byref(c_sample_count), c_samples) + _nvmlCheckReturn(ret) + return (c_sample_value_type.value, c_samples[0:c_sample_count.value]) + +def nvmlDeviceGetViolationStatus(device, perfPolicyType): + c_perfPolicy_type = _nvmlPerfPolicyType_t(perfPolicyType) + c_violTime = c_nvmlViolationTime_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetViolationStatus") + + ## Invoke the method to get violation time + ret = fn(device, c_perfPolicy_type, byref(c_violTime)) + _nvmlCheckReturn(ret) + return c_violTime + +def nvmlDeviceGetPcieThroughput(device, counter): + c_util = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPcieThroughput") + ret = fn(device, _nvmlPcieUtilCounter_t(counter), byref(c_util)) + _nvmlCheckReturn(ret) + return c_util.value + +def nvmlSystemGetTopologyGpuSet(cpuNumber): + c_count = c_uint(0) + fn = _nvmlGetFunctionPointer("nvmlSystemGetTopologyGpuSet") + + # First call will get the size + ret = fn(cpuNumber, byref(c_count), None) + + if ret != NVML_SUCCESS: + raise NVMLError(ret) + print c_count.value + # call again with a buffer + device_array = c_nvmlDevice_t * c_count.value + c_devices = device_array() + ret = fn(cpuNumber, byref(c_count), c_devices) + _nvmlCheckReturn(ret) + return map(None, c_devices[0:c_count.value]) + +def nvmlDeviceGetTopologyNearestGpus(device, level): + c_count = c_uint(0) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetTopologyNearestGpus") + + # First call will get the size + ret = fn(device, level, byref(c_count), None) + + if ret != NVML_SUCCESS: + raise NVMLError(ret) + + # call again with a buffer + device_array = c_nvmlDevice_t * c_count.value + c_devices = device_array() + ret = fn(device, level, byref(c_count), c_devices) + _nvmlCheckReturn(ret) + return map(None, c_devices[0:c_count.value]) + +def nvmlDeviceGetTopologyCommonAncestor(device1, device2): + c_level = _nvmlGpuTopologyLevel_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetTopologyCommonAncestor") + ret = fn(device1, device2, byref(c_level)) + _nvmlCheckReturn(ret) + return c_level.value diff --git a/gpu/nvidia/nvidia-ml-py-3.295.00/setup.py b/gpu/nvidia/nvidia-ml-py-7.352.0/setup.py similarity index 89% rename from gpu/nvidia/nvidia-ml-py-3.295.00/setup.py rename to gpu/nvidia/nvidia-ml-py-7.352.0/setup.py index ab1eddee..b331c1c8 100644 --- a/gpu/nvidia/nvidia-ml-py-3.295.00/setup.py +++ b/gpu/nvidia/nvidia-ml-py-7.352.0/setup.py @@ -7,11 +7,13 @@ DistributionMetadata.classifiers = None DistributionMetadata.download_url = None -setup(name='nvidia-ml-py', - version='3.295.00', +_package_name='nvidia-ml-py' + +setup(name=_package_name, + version='7.352.0', description='Python Bindings for the NVIDIA Management Library', py_modules=['pynvml', 'nvidia_smi'], - package_data=['Example.txt'], + package_data={_package_name: ['Example.txt']}, license="BSD", url="http://www.nvidia.com/", author="NVIDIA Corporation",