From 528396be4b16abc8d3e18273edef721e0991a88e Mon Sep 17 00:00:00 2001 From: Aritas1 Date: Sun, 20 Aug 2023 16:56:09 +0200 Subject: [PATCH] extend self-test log processing --- metrics.go | 11 +++++++++++ readjson.go | 2 +- smartctl.go | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 1 deletion(-) diff --git a/metrics.go b/metrics.go index 91c16a1..fb25b66 100644 --- a/metrics.go +++ b/metrics.go @@ -242,6 +242,17 @@ var ( }, nil, ) + metricDeviceSelfTest = prometheus.NewDesc( + "smartctl_device_self_test_log_seconds", + "Device SMART self test log execution lifetime seconds", + []string{ + "device", + "self_test_log_type", + "self_test_passed", + }, + nil, + ) + metricDeviceSelfTestLogCount = prometheus.NewDesc( "smartctl_device_self_test_log_count", "Device SMART self test log count", diff --git a/readjson.go b/readjson.go index 4b7d4ca..501f66b 100644 --- a/readjson.go +++ b/readjson.go @@ -64,7 +64,7 @@ func readFakeSMARTctl(logger log.Logger, device string) gjson.Result { // Get json from smartctl and parse it func readSMARTctl(logger log.Logger, device string) (gjson.Result, bool) { level.Debug(logger).Log("msg", "Collecting S.M.A.R.T. counters", "device", device) - out, err := exec.Command(*smartctlPath, "--json", "--info", "--health", "--attributes", "--tolerance=verypermissive", "--nocheck=standby", "--format=brief", "--log=error", device).Output() + out, err := exec.Command(*smartctlPath, "--json", "--info", "--health", "--attributes", "--tolerance=verypermissive", "--nocheck=standby", "--format=brief", "--log=error", "--log=selftest", device).Output() if err != nil { level.Warn(logger).Log("msg", "S.M.A.R.T. output reading", "err", err, "device", device) } diff --git a/smartctl.go b/smartctl.go index 792d0c7..b7e27af 100644 --- a/smartctl.go +++ b/smartctl.go @@ -15,6 +15,7 @@ package main import ( "fmt" + "strconv" "strings" "github.com/go-kit/log" @@ -69,6 +70,7 @@ func (smart *SMARTctl) Collect() { smart.mineDeviceSCTStatus() smart.mineDeviceStatistics() smart.mineDeviceErrorLog() + smart.mineDeviceSelfTest() smart.mineDeviceSelfTestLog() smart.mineDeviceERC() smart.minePercentageUsed() @@ -399,6 +401,50 @@ func (smart *SMARTctl) mineDeviceErrorLog() { } } +func (smart *SMARTctl) mineDeviceSelfTest() { + validTypes := map[int]string{ + 255: "vendor", + 129: "short_captive", + 2: "long", + 1: "short", + } + + // assume the table will always be in descending order + processedTypes := make(map[string]bool) + + for _, logEntry := range smart.json.Get("ata_smart_self_test_log.standard.table").Array() { + testType := int(logEntry.Get("type.value").Int()) + testTime := float64(logEntry.Get("lifetime_hours").Int()) + testRunningIndicator := int(logEntry.Get("status.value").Int()) + testStatus := strconv.FormatBool(logEntry.Get("status.passed").Bool()) + + // stick with seconds + testTime = testTime * 60 * 60 + + // skip running tests + if testRunningIndicator != 0 { + continue + } + + logTestType, exists := validTypes[testType] + if !exists { + logTestType = "unknown" + } + + if !processedTypes[logTestType] { + smart.ch <- prometheus.MustNewConstMetric( + metricDeviceSelfTest, + prometheus.GaugeValue, + testTime, + smart.device.device, + logTestType, + testStatus, + ) + processedTypes[logTestType] = true + } + } +} + func (smart *SMARTctl) mineDeviceSelfTestLog() { for logType, status := range smart.json.Get("ata_smart_self_test_log").Map() { smart.ch <- prometheus.MustNewConstMetric(