forked from BugRoger/nvidia-exporter
-
Notifications
You must be signed in to change notification settings - Fork 0
/
metrics.go
123 lines (102 loc) · 2.52 KB
/
metrics.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
package main
import (
"strconv"
"time"
"github.com/mindprince/gonvml"
)
var (
averageDuration = 10 * time.Second
)
type Metrics struct {
Version string
Devices []*Device
}
type Device struct {
Index string
MinorNumber string
Name string
UUID string
Temperature float64
PowerUsage float64
PowerUsageAverage float64
MemoryTotal float64
MemoryUsed float64
UtilizationMemory float64
UtilizationGPU float64
UtilizationGPUAverage float64
}
func collectMetrics() (*Metrics, error) {
if err := gonvml.Initialize(); err != nil {
return nil, err
}
defer gonvml.Shutdown()
version, err := gonvml.SystemDriverVersion()
if err != nil {
return nil, err
}
metrics := &Metrics{
Version: version,
}
numDevices, err := gonvml.DeviceCount()
if err != nil {
return nil, err
}
for index := 0; index < int(numDevices); index++ {
device, err := gonvml.DeviceHandleByIndex(uint(index))
if err != nil {
return nil, err
}
uuid, err := device.UUID()
if err != nil {
return nil, err
}
name, err := device.Name()
if err != nil {
return nil, err
}
minorNumber, err := device.MinorNumber()
if err != nil {
return nil, err
}
temperature, err := device.Temperature()
if err != nil {
return nil, err
}
powerUsage, err := device.PowerUsage()
if err != nil {
return nil, err
}
powerUsageAverage, err := device.AveragePowerUsage(averageDuration)
if err != nil {
return nil, err
}
memoryTotal, memoryUsed, err := device.MemoryInfo()
if err != nil {
return nil, err
}
utilizationGPU, utilizationMemory, err := device.UtilizationRates()
if err != nil {
return nil, err
}
utilizationGPUAverage, err := device.AverageGPUUtilization(averageDuration)
if err != nil {
return nil, err
}
metrics.Devices = append(metrics.Devices,
&Device{
Index: strconv.Itoa(index),
MinorNumber: strconv.Itoa(int(minorNumber)),
Name: name,
UUID: uuid,
Temperature: float64(temperature),
PowerUsage: float64(powerUsage),
PowerUsageAverage: float64(powerUsageAverage),
MemoryTotal: float64(memoryTotal),
MemoryUsed: float64(memoryUsed),
UtilizationMemory: float64(utilizationMemory),
UtilizationGPU: float64(utilizationGPU),
UtilizationGPUAverage: float64(utilizationGPUAverage),
})
}
return metrics, nil
}